From 410e9d8f9ce962923b52096d40781a569803c760 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:00:58 +0000 Subject: atm: br2684 internal stats Now that stats are in net_device, use them. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/atm/br2684.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index ea9438fc6855..993cbf6078c2 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -83,7 +83,6 @@ struct br2684_dev { struct list_head br2684_devs; int number; struct list_head brvccs; /* one device <=> one vcc (before xmas) */ - struct net_device_stats stats; int mac_was_set; enum br2684_payload payload; }; @@ -148,9 +147,10 @@ static struct net_device *br2684_find_dev(const struct br2684_if_spec *s) * the way for multiple vcc's per itf. Returns true if we can send, * otherwise false */ -static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, +static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev, struct br2684_vcc *brvcc) { + struct br2684_dev *brdev = BRPRIV(dev); struct atm_vcc *atmvcc; int minheadroom = (brvcc->encaps == e_llc) ? 10 : 2; @@ -211,8 +211,8 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct br2684_dev *brdev, } atomic_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc); ATM_SKB(skb)->atm_options = atmvcc->atm_options; - brdev->stats.tx_packets++; - brdev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; atmvcc->send(atmvcc, skb); return 1; } @@ -233,14 +233,14 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) brvcc = pick_outgoing_vcc(skb, brdev); if (brvcc == NULL) { pr_debug("no vcc attached to dev %s\n", dev->name); - brdev->stats.tx_errors++; - brdev->stats.tx_carrier_errors++; + dev->stats.tx_errors++; + dev->stats.tx_carrier_errors++; /* netif_stop_queue(dev); */ dev_kfree_skb(skb); read_unlock(&devs_lock); return 0; } - if (!br2684_xmit_vcc(skb, brdev, brvcc)) { + if (!br2684_xmit_vcc(skb, dev, brvcc)) { /* * We should probably use netif_*_queue() here, but that * involves added complication. We need to walk before @@ -248,19 +248,13 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) * * Don't free here! this pointer might be no longer valid! */ - brdev->stats.tx_errors++; - brdev->stats.tx_fifo_errors++; + dev->stats.tx_errors++; + dev->stats.tx_fifo_errors++; } read_unlock(&devs_lock); return 0; } -static struct net_device_stats *br2684_get_stats(struct net_device *dev) -{ - pr_debug("br2684_get_stats\n"); - return &BRPRIV(dev)->stats; -} - /* * We remember when the MAC gets set, so we don't override it later with * the ESI of the ATM card of the first VC @@ -430,17 +424,17 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) /* sigh, interface is down? */ if (unlikely(!(net_dev->flags & IFF_UP))) goto dropped; - brdev->stats.rx_packets++; - brdev->stats.rx_bytes += skb->len; + net_dev->stats.rx_packets++; + net_dev->stats.rx_bytes += skb->len; memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); netif_rx(skb); return; dropped: - brdev->stats.rx_dropped++; + net_dev->stats.rx_dropped++; goto free_skb; error: - brdev->stats.rx_errors++; + net_dev->stats.rx_errors++; free_skb: dev_kfree_skb(skb); return; @@ -531,8 +525,8 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) skb->next = skb->prev = NULL; br2684_push(atmvcc, skb); - BRPRIV(skb->dev)->stats.rx_bytes -= skb->len; - BRPRIV(skb->dev)->stats.rx_packets--; + skb->dev->stats.rx_bytes -= skb->len; + skb->dev->stats.rx_packets--; skb = next; } @@ -554,7 +548,6 @@ static void br2684_setup(struct net_device *netdev) my_eth_mac_addr = netdev->set_mac_address; netdev->set_mac_address = br2684_mac_addr; netdev->hard_start_xmit = br2684_start_xmit; - netdev->get_stats = br2684_get_stats; INIT_LIST_HEAD(&brdev->brvccs); } @@ -568,7 +561,6 @@ static void br2684_setup_routed(struct net_device *netdev) my_eth_mac_addr = netdev->set_mac_address; netdev->set_mac_address = br2684_mac_addr; netdev->hard_start_xmit = br2684_start_xmit; - netdev->get_stats = br2684_get_stats; netdev->addr_len = 0; netdev->mtu = 1500; netdev->type = ARPHRD_PPP; -- cgit v1.2.3 From 0ba25ff4c669e5395110ba6ab4958a97a9f96922 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:00:59 +0000 Subject: br2684: convert to net_device_ops Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/atm/br2684.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 993cbf6078c2..334fcd4a4ea4 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -259,10 +259,9 @@ static int br2684_start_xmit(struct sk_buff *skb, struct net_device *dev) * We remember when the MAC gets set, so we don't override it later with * the ESI of the ATM card of the first VC */ -static int (*my_eth_mac_addr) (struct net_device *, void *); static int br2684_mac_addr(struct net_device *dev, void *p) { - int err = my_eth_mac_addr(dev, p); + int err = eth_mac_addr(dev, p); if (!err) BRPRIV(dev)->mac_was_set = 1; return err; @@ -538,16 +537,20 @@ static int br2684_regvcc(struct atm_vcc *atmvcc, void __user * arg) return err; } +static const struct net_device_ops br2684_netdev_ops = { + .ndo_start_xmit = br2684_start_xmit, + .ndo_set_mac_address = br2684_mac_addr, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, +}; + static void br2684_setup(struct net_device *netdev) { struct br2684_dev *brdev = BRPRIV(netdev); ether_setup(netdev); - brdev->net_dev = netdev; - my_eth_mac_addr = netdev->set_mac_address; - netdev->set_mac_address = br2684_mac_addr; - netdev->hard_start_xmit = br2684_start_xmit; + netdev->netdev_ops = &br2684_netdev_ops; INIT_LIST_HEAD(&brdev->brvccs); } @@ -558,9 +561,8 @@ static void br2684_setup_routed(struct net_device *netdev) brdev->net_dev = netdev; netdev->hard_header_len = 0; - my_eth_mac_addr = netdev->set_mac_address; - netdev->set_mac_address = br2684_mac_addr; - netdev->hard_start_xmit = br2684_start_xmit; + + netdev->netdev_ops = &br2684_netdev_ops; netdev->addr_len = 0; netdev->mtu = 1500; netdev->type = ARPHRD_PPP; -- cgit v1.2.3 From 1a6afe8a733a3edaa1816c10ec2a7353ae0ff47b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:01:00 +0000 Subject: clip: convert to internal network_device_stats Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/atm/clip.c | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index 2d33a83be799..da42fd06b61f 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -214,15 +214,15 @@ static void clip_push(struct atm_vcc *vcc, struct sk_buff *skb) skb->protocol = ((__be16 *) skb->data)[3]; skb_pull(skb, RFC1483LLC_LEN); if (skb->protocol == htons(ETH_P_ARP)) { - PRIV(skb->dev)->stats.rx_packets++; - PRIV(skb->dev)->stats.rx_bytes += skb->len; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; clip_arp_rcv(skb); return; } } clip_vcc->last_use = jiffies; - PRIV(skb->dev)->stats.rx_packets++; - PRIV(skb->dev)->stats.rx_bytes += skb->len; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); netif_rx(skb); } @@ -372,7 +372,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) if (!skb->dst) { printk(KERN_ERR "clip_start_xmit: skb->dst == NULL\n"); dev_kfree_skb(skb); - clip_priv->stats.tx_dropped++; + dev->stats.tx_dropped++; return 0; } if (!skb->dst->neighbour) { @@ -380,13 +380,13 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->dst->neighbour = clip_find_neighbour(skb->dst, 1); if (!skb->dst->neighbour) { dev_kfree_skb(skb); /* lost that one */ - clip_priv->stats.tx_dropped++; + dev->stats.tx_dropped++; return 0; } #endif printk(KERN_ERR "clip_start_xmit: NO NEIGHBOUR !\n"); dev_kfree_skb(skb); - clip_priv->stats.tx_dropped++; + dev->stats.tx_dropped++; return 0; } entry = NEIGH2ENTRY(skb->dst->neighbour); @@ -400,7 +400,7 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) skb_queue_tail(&entry->neigh->arp_queue, skb); else { dev_kfree_skb(skb); - clip_priv->stats.tx_dropped++; + dev->stats.tx_dropped++; } return 0; } @@ -423,8 +423,8 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) printk(KERN_WARNING "clip_start_xmit: XOFF->XOFF transition\n"); return 0; } - clip_priv->stats.tx_packets++; - clip_priv->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; vcc->send(vcc, skb); if (atm_may_send(vcc, 0)) { entry->vccs->xoff = 0; @@ -443,11 +443,6 @@ static int clip_start_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static struct net_device_stats *clip_get_stats(struct net_device *dev) -{ - return &PRIV(dev)->stats; -} - static int clip_mkip(struct atm_vcc *vcc, int timeout) { struct clip_vcc *clip_vcc; @@ -501,8 +496,8 @@ static int clip_mkip(struct atm_vcc *vcc, int timeout) skb_get(skb); clip_push(vcc, skb); - PRIV(skb->dev)->stats.rx_packets--; - PRIV(skb->dev)->stats.rx_bytes -= len; + skb->dev->stats.rx_packets--; + skb->dev->stats.rx_bytes -= len; kfree_skb(skb); } @@ -561,7 +556,6 @@ static void clip_setup(struct net_device *dev) { dev->hard_start_xmit = clip_start_xmit; /* sg_xmit ... */ - dev->get_stats = clip_get_stats; dev->type = ARPHRD_ATM; dev->hard_header_len = RFC1483LLC_LEN; dev->mtu = RFC1626_MTU; -- cgit v1.2.3 From 162619e59ab456aa689080726cb2ada24c1dfddd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:01:01 +0000 Subject: lec: convert to internal network_device_stats Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/atm/lec.c | 44 +++++++++++++++++--------------------------- net/atm/lec.h | 1 - 2 files changed, 17 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index e5e301550e8a..63ff8b9a85ba 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -62,7 +62,6 @@ static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; static int lec_open(struct net_device *dev); static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev); static int lec_close(struct net_device *dev); -static struct net_device_stats *lec_get_stats(struct net_device *dev); static void lec_init(struct net_device *dev); static struct lec_arp_table *lec_arp_find(struct lec_priv *priv, const unsigned char *mac_addr); @@ -218,28 +217,28 @@ static unsigned char *get_tr_dst(unsigned char *packet, unsigned char *rdesc) static int lec_open(struct net_device *dev) { - struct lec_priv *priv = netdev_priv(dev); - netif_start_queue(dev); - memset(&priv->stats, 0, sizeof(struct net_device_stats)); + memset(&dev->stats, 0, sizeof(struct net_device_stats)); return 0; } -static __inline__ void -lec_send(struct atm_vcc *vcc, struct sk_buff *skb, struct lec_priv *priv) +static void +lec_send(struct atm_vcc *vcc, struct sk_buff *skb) { + struct net_device *dev = skb->dev; + ATM_SKB(skb)->vcc = vcc; ATM_SKB(skb)->atm_options = vcc->atm_options; atomic_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc); if (vcc->send(vcc, skb) < 0) { - priv->stats.tx_dropped++; + dev->stats.tx_dropped++; return; } - priv->stats.tx_packets++; - priv->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; } static void lec_tx_timeout(struct net_device *dev) @@ -270,7 +269,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) pr_debug("lec_start_xmit called\n"); if (!priv->lecd) { printk("%s:No lecd attached\n", dev->name); - priv->stats.tx_errors++; + dev->stats.tx_errors++; netif_stop_queue(dev); return -EUNATCH; } @@ -345,7 +344,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) GFP_ATOMIC); dev_kfree_skb(skb); if (skb2 == NULL) { - priv->stats.tx_dropped++; + dev->stats.tx_dropped++; return 0; } skb = skb2; @@ -380,7 +379,7 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) ("%s:lec_start_xmit: tx queue full or no arp entry, dropping, ", dev->name); pr_debug("MAC address %pM\n", lec_h->h_dest); - priv->stats.tx_dropped++; + dev->stats.tx_dropped++; dev_kfree_skb(skb); } goto out; @@ -392,10 +391,10 @@ static int lec_start_xmit(struct sk_buff *skb, struct net_device *dev) while (entry && (skb2 = skb_dequeue(&entry->tx_wait))) { pr_debug("lec.c: emptying tx queue, "); pr_debug("MAC address %pM\n", lec_h->h_dest); - lec_send(vcc, skb2, priv); + lec_send(vcc, skb2); } - lec_send(vcc, skb, priv); + lec_send(vcc, skb); if (!atm_may_send(vcc, 0)) { struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); @@ -427,15 +426,6 @@ static int lec_close(struct net_device *dev) return 0; } -/* - * Get the current statistics. - * This may be called with the card open or closed. - */ -static struct net_device_stats *lec_get_stats(struct net_device *dev) -{ - return &((struct lec_priv *)netdev_priv(dev))->stats; -} - static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { unsigned long flags; @@ -810,8 +800,8 @@ static void lec_push(struct atm_vcc *vcc, struct sk_buff *skb) else #endif skb->protocol = eth_type_trans(skb, dev); - priv->stats.rx_packets++; - priv->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; memset(ATM_SKB(skb), 0, sizeof(struct atm_skb_data)); netif_rx(skb); } @@ -1887,7 +1877,7 @@ restart: lec_arp_hold(entry); spin_unlock_irqrestore(&priv->lec_arp_lock, flags); while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) - lec_send(vcc, skb, entry->priv); + lec_send(vcc, skb); entry->last_used = jiffies; entry->status = ESI_FORWARD_DIRECT; lec_arp_put(entry); @@ -2305,7 +2295,7 @@ restart: lec_arp_hold(entry); spin_unlock_irqrestore(&priv->lec_arp_lock, flags); while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) - lec_send(vcc, skb, entry->priv); + lec_send(vcc, skb); entry->last_used = jiffies; entry->status = ESI_FORWARD_DIRECT; lec_arp_put(entry); diff --git a/net/atm/lec.h b/net/atm/lec.h index 0d376682c1a3..9d14d196cc1d 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -69,7 +69,6 @@ struct lane2_ops { #define LEC_ARP_TABLE_SIZE 16 struct lec_priv { - struct net_device_stats stats; unsigned short lecid; /* Lecid of this client */ struct hlist_head lec_arp_empty_ones; /* Used for storing VCC's that don't have a MAC address attached yet */ -- cgit v1.2.3 From 004b3225c016efc90cbfe43cdf69c6331462bc56 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:01:02 +0000 Subject: lec: convert to net_device_ops Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/atm/lec.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index 63ff8b9a85ba..c0cba9a037e8 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -667,17 +667,19 @@ static void lec_set_multicast_list(struct net_device *dev) return; } +static const struct net_device_ops lec_netdev_ops = { + .ndo_open = lec_open, + .ndo_stop = lec_close, + .ndo_start_xmit = lec_start_xmit, + .ndo_change_mtu = lec_change_mtu, + .ndo_tx_timeout = lec_tx_timeout, + .ndo_set_multicast_list = lec_set_multicast_list, +}; + + static void lec_init(struct net_device *dev) { - dev->change_mtu = lec_change_mtu; - dev->open = lec_open; - dev->stop = lec_close; - dev->hard_start_xmit = lec_start_xmit; - dev->tx_timeout = lec_tx_timeout; - - dev->get_stats = lec_get_stats; - dev->set_multicast_list = lec_set_multicast_list; - dev->do_ioctl = NULL; + dev->netdev_ops = &lec_netdev_ops; printk("%s: Initialized!\n", dev->name); } -- cgit v1.2.3 From b51414b69148433a79af5dc93463a0489492a788 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:01:03 +0000 Subject: netrom: convert to internal net_device_stats Signed-off-by: Stephen Hemminger Acked-by: Ralf Baechle Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 2 +- net/netrom/nr_dev.c | 14 ++------------ 2 files changed, 3 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index e9c05b8f4f45..cba7849de98e 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1432,7 +1432,7 @@ static int __init nr_proto_init(void) struct net_device *dev; sprintf(name, "nr%d", i); - dev = alloc_netdev(sizeof(struct nr_private), name, nr_setup); + dev = alloc_netdev(0, name, nr_setup); if (!dev) { printk(KERN_ERR "NET/ROM: nr_proto_init - unable to allocate device structure\n"); goto fail; diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 6caf459665f2..5b9a31a6e685 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -42,7 +42,7 @@ int nr_rx_ip(struct sk_buff *skb, struct net_device *dev) { - struct net_device_stats *stats = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; if (!netif_running(dev)) { stats->rx_dropped++; @@ -171,8 +171,7 @@ static int nr_close(struct net_device *dev) static int nr_xmit(struct sk_buff *skb, struct net_device *dev) { - struct nr_private *nr = netdev_priv(dev); - struct net_device_stats *stats = &nr->stats; + struct net_device_stats *stats = &dev->stats; unsigned int len = skb->len; if (!nr_route_frame(skb, NULL)) { @@ -187,13 +186,6 @@ static int nr_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static struct net_device_stats *nr_get_stats(struct net_device *dev) -{ - struct nr_private *nr = netdev_priv(dev); - - return &nr->stats; -} - static const struct header_ops nr_header_ops = { .create = nr_header, .rebuild= nr_rebuild_header, @@ -215,6 +207,4 @@ void nr_setup(struct net_device *dev) /* New-style flags. */ dev->flags = IFF_NOARP; - - dev->get_stats = nr_get_stats; } -- cgit v1.2.3 From 0f6c5c8e79781974c0e660fd8bfc659b101b44fd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:01:04 +0000 Subject: netrom: convert to net_device_ops Signed-off-by: Stephen Hemminger Acked-by: Ralf Baechle Signed-off-by: David S. Miller --- net/netrom/nr_dev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netrom/nr_dev.c b/net/netrom/nr_dev.c index 5b9a31a6e685..351372463fed 100644 --- a/net/netrom/nr_dev.c +++ b/net/netrom/nr_dev.c @@ -191,19 +191,21 @@ static const struct header_ops nr_header_ops = { .rebuild= nr_rebuild_header, }; +static const struct net_device_ops nr_netdev_ops = { + .ndo_open = nr_open, + .ndo_stop = nr_close, + .ndo_start_xmit = nr_xmit, + .ndo_set_mac_address = nr_set_mac_address, +}; void nr_setup(struct net_device *dev) { dev->mtu = NR_MAX_PACKET_SIZE; - dev->hard_start_xmit = nr_xmit; - dev->open = nr_open; - dev->stop = nr_close; - + dev->netdev_ops = &nr_netdev_ops; dev->header_ops = &nr_header_ops; dev->hard_header_len = NR_NETWORK_LEN + NR_TRANSPORT_LEN; dev->addr_len = AX25_ADDR_LEN; dev->type = ARPHRD_NETROM; - dev->set_mac_address = nr_set_mac_address; /* New-style flags. */ dev->flags = IFF_NOARP; -- cgit v1.2.3 From d289d120b46d9b6c68448b1d1c6d3edb94cdbde6 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:01:05 +0000 Subject: rose: convert to internal net_device_stats Signed-off-by: Stephen Hemminger Acked-by: Ralf Baechle Signed-off-by: David S. Miller --- net/rose/af_rose.c | 3 +-- net/rose/rose_dev.c | 10 ++-------- 2 files changed, 3 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 01392649b462..650139626581 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1587,8 +1587,7 @@ static int __init rose_proto_init(void) char name[IFNAMSIZ]; sprintf(name, "rose%d", i); - dev = alloc_netdev(sizeof(struct net_device_stats), - name, rose_setup); + dev = alloc_netdev(0, name, rose_setup); if (!dev) { printk(KERN_ERR "ROSE: rose_proto_init - unable to allocate memory\n"); rc = -ENOMEM; diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index 12cfcf09556b..ddb566707184 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -57,7 +57,7 @@ static int rose_rebuild_header(struct sk_buff *skb) { #ifdef CONFIG_INET struct net_device *dev = skb->dev; - struct net_device_stats *stats = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; unsigned char *bp = (unsigned char *)skb->data; struct sk_buff *skbn; unsigned int len; @@ -133,7 +133,7 @@ static int rose_close(struct net_device *dev) static int rose_xmit(struct sk_buff *skb, struct net_device *dev) { - struct net_device_stats *stats = netdev_priv(dev); + struct net_device_stats *stats = &dev->stats; if (!netif_running(dev)) { printk(KERN_ERR "ROSE: rose_xmit - called when iface is down\n"); @@ -144,11 +144,6 @@ static int rose_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } -static struct net_device_stats *rose_get_stats(struct net_device *dev) -{ - return netdev_priv(dev); -} - static const struct header_ops rose_header_ops = { .create = rose_header, .rebuild= rose_rebuild_header, @@ -169,5 +164,4 @@ void rose_setup(struct net_device *dev) /* New-style flags. */ dev->flags = IFF_NOARP; - dev->get_stats = rose_get_stats; } -- cgit v1.2.3 From 3170c6568776a58e1eeec8ff949a65f5cf5d7ceb Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:01:06 +0000 Subject: rose: convert to network_device_ops Signed-off-by: Stephen Hemminger Acked-by: Ralf Baechle Signed-off-by: David S. Miller --- net/rose/rose_dev.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/rose/rose_dev.c b/net/rose/rose_dev.c index ddb566707184..7dcf2569613b 100644 --- a/net/rose/rose_dev.c +++ b/net/rose/rose_dev.c @@ -149,18 +149,22 @@ static const struct header_ops rose_header_ops = { .rebuild= rose_rebuild_header, }; +static const struct net_device_ops rose_netdev_ops = { + .ndo_open = rose_open, + .ndo_stop = rose_close, + .ndo_start_xmit = rose_xmit, + .ndo_set_mac_address = rose_set_mac_address, +}; + void rose_setup(struct net_device *dev) { dev->mtu = ROSE_MAX_PACKET_SIZE - 2; - dev->hard_start_xmit = rose_xmit; - dev->open = rose_open; - dev->stop = rose_close; + dev->netdev_ops = &rose_netdev_ops; dev->header_ops = &rose_header_ops; dev->hard_header_len = AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; dev->addr_len = ROSE_ADDR_LEN; dev->type = ARPHRD_ROSE; - dev->set_mac_address = rose_set_mac_address; /* New-style flags. */ dev->flags = IFF_NOARP; -- cgit v1.2.3 From 60961ce4d09db7c1ba49da3375123a18845ec864 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 9 Jan 2009 13:01:07 +0000 Subject: appletalk: remove unneeded stubs With net_device_ops if set_mac_address is null, then error is -EOPNOTSUPPORTED. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/appletalk/dev.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c index d856a62ab50f..72277d70c980 100644 --- a/net/appletalk/dev.c +++ b/net/appletalk/dev.c @@ -9,22 +9,20 @@ #include #include +#ifdef CONFIG_COMPAT_NET_DEV_OPS static int ltalk_change_mtu(struct net_device *dev, int mtu) { return -EINVAL; } - -static int ltalk_mac_addr(struct net_device *dev, void *addr) -{ - return -EINVAL; -} +#endif static void ltalk_setup(struct net_device *dev) { /* Fill in the fields of the device structure with localtalk-generic values. */ +#ifdef CONFIG_COMPAT_NET_DEV_OPS dev->change_mtu = ltalk_change_mtu; - dev->set_mac_address = ltalk_mac_addr; +#endif dev->type = ARPHRD_LOCALTLK; dev->hard_header_len = LTALK_HLEN; -- cgit v1.2.3 From f90f92eed74251034f251e3cdf4fa5c4c1f09df0 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 16 Jan 2009 23:36:30 +0000 Subject: dccp: Initialisation framework for feature negotiation This initialises feature negotiation from two tables, which are in turn are initialised from sysctls. As a novel feature, specifics of the implementation (e.g. that short seqnos and ECN are not yet available) are advertised for robustness. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/feat.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++-------- net/dccp/feat.h | 2 +- 2 files changed, 57 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 4152308958ab..67ffac9905f8 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1115,23 +1115,70 @@ int dccp_feat_parse_options(struct sock *sk, struct dccp_request_sock *dreq, return 0; /* ignore FN options in all other states */ } +/** + * dccp_feat_init - Seed feature negotiation with host-specific defaults + * This initialises global defaults, depending on the value of the sysctls. + * These can later be overridden by registering changes via setsockopt calls. + * The last link in the chain is finalise_settings, to make sure that between + * here and the start of actual feature negotiation no inconsistencies enter. + * + * All features not appearing below use either defaults or are otherwise + * later adjusted through dccp_feat_finalise_settings(). + */ int dccp_feat_init(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); + struct list_head *fn = &dccp_sk(sk)->dccps_featneg; + u8 on = 1, off = 0; int rc; + struct { + u8 *val; + u8 len; + } tx, rx; + + /* Non-negotiable (NN) features */ + rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0, + sysctl_dccp_feat_sequence_window); + if (rc) + return rc; + + /* Server-priority (SP) features */ + + /* Advertise that short seqnos are not supported (7.6.1) */ + rc = __feat_register_sp(fn, DCCPF_SHORT_SEQNOS, true, true, &off, 1); + if (rc) + return rc; - INIT_LIST_HEAD(&dmsk->dccpms_pending); /* XXX no longer used */ - INIT_LIST_HEAD(&dmsk->dccpms_conf); /* XXX no longer used */ + /* RFC 4340 12.1: "If a DCCP is not ECN capable, ..." */ + rc = __feat_register_sp(fn, DCCPF_ECN_INCAPABLE, true, true, &on, 1); + if (rc) + return rc; + + /* + * We advertise the available list of CCIDs and reorder according to + * preferences, to avoid failure resulting from negotiating different + * singleton values (which always leads to failure). + * These settings can still (later) be overridden via sockopts. + */ + if (ccid_get_builtin_ccids(&tx.val, &tx.len) || + ccid_get_builtin_ccids(&rx.val, &rx.len)) + return -ENOBUFS; - /* Ack ratio */ - rc = __feat_register_nn(&dp->dccps_featneg, DCCPF_ACK_RATIO, 0, - dp->dccps_l_ack_ratio); + if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) || + !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len)) + goto free_ccid_lists; + + rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len); + if (rc) + goto free_ccid_lists; + + rc = __feat_register_sp(fn, DCCPF_CCID, false, false, rx.val, rx.len); + +free_ccid_lists: + kfree(tx.val); + kfree(rx.val); return rc; } -EXPORT_SYMBOL_GPL(dccp_feat_init); - int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) { struct dccp_sock *dp = dccp_sk(sk); diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 9b46e2a7866e..5e7b8481cd04 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -113,13 +113,13 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) #define dccp_feat_debug(type, feat, val) #endif /* CONFIG_IP_DCCP_DEBUG */ +extern int dccp_feat_init(struct sock *sk); extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, u8 const *list, u8 len); extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); extern int dccp_feat_parse_options(struct sock *, struct dccp_request_sock *, u8 mand, u8 opt, u8 feat, u8 *val, u8 len); extern int dccp_feat_clone_list(struct list_head const *, struct list_head *); -extern int dccp_feat_init(struct sock *sk); /* * Encoding variable-length options and their maximum length. -- cgit v1.2.3 From 792b48780e8b6435d017cef4b5c304876a48653e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 16 Jan 2009 23:36:31 +0000 Subject: dccp: Implement both feature-local and feature-remote Sequence Window feature This adds full support for local/remote Sequence Window feature, from which the * sequence-number-validity (W) and * acknowledgment-number-validity (W') windows derive as specified in RFC 4340, 7.5.3. Specifically, the following is contained in this patch: * integrated new socket fields into dccp_sk; * updated the update_gsr/gss routines with regard to these fields; * updated handler code: the Sequence Window feature is located at the TX side, so the local feature is meant if the handler-rx flag is false; * the initialisation of `rcv_wnd' in reqsk is removed, since - rcv_wnd is not used by the code anywhere; - sequence number checks are not done in the LISTEN state (cf. 7.5.3); - dccp_check_req checks the Ack number validity more rigorously; * the `struct dccp_minisock' became empty and is now removed. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/dccp.h | 16 +++++++--------- net/dccp/feat.c | 13 +++++++++++-- net/dccp/minisocks.c | 11 ----------- net/dccp/proto.c | 2 -- 4 files changed, 18 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index f2230fc168e1..04ae91898a68 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -409,23 +409,21 @@ static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, static inline void dccp_update_gsr(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - const struct dccp_minisock *dmsk = dccp_msk(sk); dp->dccps_gsr = seq; - dccp_set_seqno(&dp->dccps_swl, - dp->dccps_gsr + 1 - (dmsk->dccpms_sequence_window / 4)); - dccp_set_seqno(&dp->dccps_swh, - dp->dccps_gsr + (3 * dmsk->dccpms_sequence_window) / 4); + /* Sequence validity window depends on remote Sequence Window (7.5.1) */ + dp->dccps_swl = SUB48(ADD48(dp->dccps_gsr, 1), dp->dccps_r_seq_win / 4); + dp->dccps_swh = ADD48(dp->dccps_gsr, (3 * dp->dccps_r_seq_win) / 4); } static inline void dccp_update_gss(struct sock *sk, u64 seq) { struct dccp_sock *dp = dccp_sk(sk); - dp->dccps_awh = dp->dccps_gss = seq; - dccp_set_seqno(&dp->dccps_awl, - (dp->dccps_gss - - dccp_msk(sk)->dccpms_sequence_window + 1)); + dp->dccps_gss = seq; + /* Ack validity window depends on local Sequence Window value (7.5.1) */ + dp->dccps_awl = SUB48(ADD48(dp->dccps_gss, 1), dp->dccps_l_seq_win); + dp->dccps_awh = dp->dccps_gss; } static inline int dccp_ack_pending(const struct sock *sk) diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 67ffac9905f8..7303f79705d2 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -51,8 +51,17 @@ static int dccp_hdlr_ccid(struct sock *sk, u64 ccid, bool rx) static int dccp_hdlr_seq_win(struct sock *sk, u64 seq_win, bool rx) { - if (!rx) - dccp_msk(sk)->dccpms_sequence_window = seq_win; + struct dccp_sock *dp = dccp_sk(sk); + + if (rx) { + dp->dccps_r_seq_win = seq_win; + /* propagate changes to update SWL/SWH */ + dccp_update_gsr(sk, dp->dccps_gsr); + } else { + dp->dccps_l_seq_win = seq_win; + /* propagate changes to update AWL */ + dccp_update_gss(sk, dp->dccps_gss); + } return 0; } diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 6821ae33dd37..5ca49cec95f5 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -42,11 +42,6 @@ struct inet_timewait_death_row dccp_death_row = { EXPORT_SYMBOL_GPL(dccp_death_row); -void dccp_minisock_init(struct dccp_minisock *dmsk) -{ - dmsk->dccpms_sequence_window = sysctl_dccp_feat_sequence_window; -} - void dccp_time_wait(struct sock *sk, int state, int timeo) { struct inet_timewait_sock *tw = NULL; @@ -110,7 +105,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk, struct dccp_request_sock *dreq = dccp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct dccp_sock *newdp = dccp_sk(newsk); - struct dccp_minisock *newdmsk = dccp_msk(newsk); newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; @@ -128,10 +122,6 @@ struct sock *dccp_create_openreq_child(struct sock *sk, * Initialize S.GAR := S.ISS * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies */ - - /* See dccp_v4_conn_request */ - newdmsk->dccpms_sequence_window = req->rcv_wnd; - newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss; dccp_update_gss(newsk, dreq->dreq_iss); @@ -290,7 +280,6 @@ int dccp_reqsk_init(struct request_sock *req, inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; inet_rsk(req)->acked = 0; - req->rcv_wnd = sysctl_dccp_feat_sequence_window; dreq->dreq_timestamp_echo = 0; /* inherit feature negotiation options from listening socket */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 945b4d5d23b3..314a1b5c033c 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -174,8 +174,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) struct dccp_sock *dp = dccp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - dccp_minisock_init(&dp->dccps_minisock); - icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; -- cgit v1.2.3 From 883ca833e5fb814fb03426c9d35e5489ce43e8da Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 16 Jan 2009 23:36:32 +0000 Subject: dccp: Initialisation and type-checking of feature sysctls This patch takes care of initialising and type-checking sysctls related to feature negotiation. Type checking is important since some of the sysctls now directly impact the feature-negotiation process. The sysctls are initialised with the known default values for each feature. For the type-checking the value constraints from RFC 4340 are used: * Sequence Window uses the specified Wmin=32, the maximum is ulong (4 bytes), tested and confirmed that it works up to 4294967295 - for Gbps speed; * Ack Ratio is between 0 .. 0xffff (2-byte unsigned integer); * CCIDs are between 0 .. 255; * request_retries, retries1, retries2 also between 0..255 for good measure; * tx_qlen is checked to be non-negative; * sync_ratelimit remains as before. Notes: ------ 1. Die s@sysctl_dccp_feat@sysctl_dccp@g since the sysctls are now in feat.c. 2. As pointed out by Arnaldo, the pattern of type-checking repeats itself in other places, sometimes with exactly the same kind of definitions (e.g. "static int zero;"). It may be a good idea (kernel janitors?) to consolidate type checking. For the sake of keeping the changeset small and in order not to affect other subsystems, I have not strived to generalise here. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/dccp.h | 3 --- net/dccp/feat.c | 11 ++++++++--- net/dccp/feat.h | 8 ++++++++ net/dccp/options.c | 4 ---- net/dccp/sysctl.c | 43 ++++++++++++++++++++++++++++++------------- 5 files changed, 46 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 04ae91898a68..44a5bc6f6785 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -95,9 +95,6 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); extern int sysctl_dccp_request_retries; extern int sysctl_dccp_retries1; extern int sysctl_dccp_retries2; -extern int sysctl_dccp_feat_sequence_window; -extern int sysctl_dccp_feat_rx_ccid; -extern int sysctl_dccp_feat_tx_ccid; extern int sysctl_dccp_tx_qlen; extern int sysctl_dccp_sync_ratelimit; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 7303f79705d2..12006e9b2472 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -25,6 +25,11 @@ #include "ccid.h" #include "feat.h" +/* feature-specific sysctls - initialised to the defaults from RFC 4340, 6.4 */ +unsigned long sysctl_dccp_sequence_window __read_mostly = 100; +int sysctl_dccp_rx_ccid __read_mostly = 2, + sysctl_dccp_tx_ccid __read_mostly = 2; + /* * Feature activation handlers. * @@ -1146,7 +1151,7 @@ int dccp_feat_init(struct sock *sk) /* Non-negotiable (NN) features */ rc = __feat_register_nn(fn, DCCPF_SEQUENCE_WINDOW, 0, - sysctl_dccp_feat_sequence_window); + sysctl_dccp_sequence_window); if (rc) return rc; @@ -1172,8 +1177,8 @@ int dccp_feat_init(struct sock *sk) ccid_get_builtin_ccids(&rx.val, &rx.len)) return -ENOBUFS; - if (!dccp_feat_prefer(sysctl_dccp_feat_tx_ccid, tx.val, tx.len) || - !dccp_feat_prefer(sysctl_dccp_feat_rx_ccid, rx.val, rx.len)) + if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || + !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) goto free_ccid_lists; rc = __feat_register_sp(fn, DCCPF_CCID, true, false, tx.val, tx.len); diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 5e7b8481cd04..40aa7a10bd5f 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -100,6 +100,13 @@ struct ccid_dependency { u8 val; }; +/* + * Sysctls to seed defaults for feature negotiation + */ +extern unsigned long sysctl_dccp_sequence_window; +extern int sysctl_dccp_rx_ccid; +extern int sysctl_dccp_tx_ccid; + #ifdef CONFIG_IP_DCCP_DEBUG extern const char *dccp_feat_typename(const u8 type); extern const char *dccp_feat_name(const u8 feat); @@ -114,6 +121,7 @@ static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) #endif /* CONFIG_IP_DCCP_DEBUG */ extern int dccp_feat_init(struct sock *sk); +extern void dccp_feat_initialise_sysctls(void); extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, u8 const *list, u8 len); extern int dccp_feat_register_nn(struct sock *sk, u8 feat, u64 val); diff --git a/net/dccp/options.c b/net/dccp/options.c index 7b1165c21f51..3e2726c7182d 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -23,10 +23,6 @@ #include "dccp.h" #include "feat.h" -int sysctl_dccp_feat_sequence_window = DCCPF_INITIAL_SEQUENCE_WINDOW; -int sysctl_dccp_feat_rx_ccid = DCCPF_INITIAL_CCID; -int sysctl_dccp_feat_tx_ccid = DCCPF_INITIAL_CCID; - u64 dccp_decode_value_var(const u8 *bf, const u8 len) { u64 value = 0; diff --git a/net/dccp/sysctl.c b/net/dccp/sysctl.c index 018e210875e1..a5a1856234e7 100644 --- a/net/dccp/sysctl.c +++ b/net/dccp/sysctl.c @@ -18,55 +18,72 @@ #error This file should not be compiled without CONFIG_SYSCTL defined #endif +/* Boundary values */ +static int zero = 0, + u8_max = 0xFF; +static unsigned long seqw_min = 32; + static struct ctl_table dccp_default_table[] = { { .procname = "seq_window", - .data = &sysctl_dccp_feat_sequence_window, - .maxlen = sizeof(sysctl_dccp_feat_sequence_window), + .data = &sysctl_dccp_sequence_window, + .maxlen = sizeof(sysctl_dccp_sequence_window), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_doulongvec_minmax, + .extra1 = &seqw_min, /* RFC 4340, 7.5.2 */ }, { .procname = "rx_ccid", - .data = &sysctl_dccp_feat_rx_ccid, - .maxlen = sizeof(sysctl_dccp_feat_rx_ccid), + .data = &sysctl_dccp_rx_ccid, + .maxlen = sizeof(sysctl_dccp_rx_ccid), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, /* RFC 4340, 10. */ }, { .procname = "tx_ccid", - .data = &sysctl_dccp_feat_tx_ccid, - .maxlen = sizeof(sysctl_dccp_feat_tx_ccid), + .data = &sysctl_dccp_tx_ccid, + .maxlen = sizeof(sysctl_dccp_tx_ccid), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, /* RFC 4340, 10. */ }, { .procname = "request_retries", .data = &sysctl_dccp_request_retries, .maxlen = sizeof(sysctl_dccp_request_retries), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, }, { .procname = "retries1", .data = &sysctl_dccp_retries1, .maxlen = sizeof(sysctl_dccp_retries1), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, }, { .procname = "retries2", .data = &sysctl_dccp_retries2, .maxlen = sizeof(sysctl_dccp_retries2), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &u8_max, }, { .procname = "tx_qlen", .data = &sysctl_dccp_tx_qlen, .maxlen = sizeof(sysctl_dccp_tx_qlen), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, }, { .procname = "sync_ratelimit", -- cgit v1.2.3 From f3f3abb62ccb1a1c77bcce855c04e12356e6ac95 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 16 Jan 2009 23:36:33 +0000 Subject: dccp: Debugging functions for feature negotiation Since all feature-negotiation processing now takes place in feat.c, functions for producing verbose debugging output are concentrated there. New functions to print out values, entry records, and options are provided, and also a macro is defined to not always have the function name in the output line. Thanks a lot to Wei Yongjun and Giuseppe Galeota for help and discussion with an earlier revision of this patch. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/dccp.h | 2 + net/dccp/feat.c | 149 ++++++++++++++++++++++++++++++++++++++--------------- net/dccp/feat.h | 13 ----- net/dccp/options.c | 4 -- 4 files changed, 109 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 44a5bc6f6785..08a569ff02d1 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -42,9 +42,11 @@ extern int dccp_debug; #define dccp_pr_debug(format, a...) DCCP_PR_DEBUG(dccp_debug, format, ##a) #define dccp_pr_debug_cat(format, a...) DCCP_PRINTK(dccp_debug, format, ##a) +#define dccp_debug(fmt, a...) dccp_pr_debug_cat(KERN_DEBUG fmt, ##a) #else #define dccp_pr_debug(format, a...) #define dccp_pr_debug_cat(format, a...) +#define dccp_debug(format, a...) #endif extern struct inet_hashinfo dccp_hashinfo; diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 12006e9b2472..b04160a2eea5 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -208,6 +208,100 @@ static int dccp_feat_default_value(u8 feat_num) return idx < 0 ? 0 : dccp_feat_table[idx].default_value; } +/* + * Debugging and verbose-printing section + */ +static const char *dccp_feat_fname(const u8 feat) +{ + static const char *feature_names[] = { + [DCCPF_RESERVED] = "Reserved", + [DCCPF_CCID] = "CCID", + [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", + [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", + [DCCPF_ECN_INCAPABLE] = "ECN Incapable", + [DCCPF_ACK_RATIO] = "Ack Ratio", + [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", + [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", + [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", + [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", + }; + if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) + return feature_names[DCCPF_RESERVED]; + + if (feat == DCCPF_SEND_LEV_RATE) + return "Send Loss Event Rate"; + if (feat >= DCCPF_MIN_CCID_SPECIFIC) + return "CCID-specific"; + + return feature_names[feat]; +} + +static const char *dccp_feat_sname[] = { "DEFAULT", "INITIALISING", "CHANGING", + "UNSTABLE", "STABLE" }; + +#ifdef CONFIG_IP_DCCP_DEBUG +static const char *dccp_feat_oname(const u8 opt) +{ + switch (opt) { + case DCCPO_CHANGE_L: return "Change_L"; + case DCCPO_CONFIRM_L: return "Confirm_L"; + case DCCPO_CHANGE_R: return "Change_R"; + case DCCPO_CONFIRM_R: return "Confirm_R"; + } + return NULL; +} + +static void dccp_feat_printval(u8 feat_num, dccp_feat_val const *val) +{ + u8 i, type = dccp_feat_type(feat_num); + + if (val == NULL || (type == FEAT_SP && val->sp.vec == NULL)) + dccp_pr_debug_cat("(NULL)"); + else if (type == FEAT_SP) + for (i = 0; i < val->sp.len; i++) + dccp_pr_debug_cat("%s%u", i ? " " : "", val->sp.vec[i]); + else if (type == FEAT_NN) + dccp_pr_debug_cat("%llu", (unsigned long long)val->nn); + else + dccp_pr_debug_cat("unknown type %u", type); +} + +static void dccp_feat_printvals(u8 feat_num, u8 *list, u8 len) +{ + u8 type = dccp_feat_type(feat_num); + dccp_feat_val fval = { .sp.vec = list, .sp.len = len }; + + if (type == FEAT_NN) + fval.nn = dccp_decode_value_var(list, len); + dccp_feat_printval(feat_num, &fval); +} + +static void dccp_feat_print_entry(struct dccp_feat_entry const *entry) +{ + dccp_debug(" * %s %s = ", entry->is_local ? "local" : "remote", + dccp_feat_fname(entry->feat_num)); + dccp_feat_printval(entry->feat_num, &entry->val); + dccp_pr_debug_cat(", state=%s %s\n", dccp_feat_sname[entry->state], + entry->needs_confirm ? "(Confirm pending)" : ""); +} + +#define dccp_feat_print_opt(opt, feat, val, len, mandatory) do { \ + dccp_pr_debug("%s(%s, ", dccp_feat_oname(opt), dccp_feat_fname(feat));\ + dccp_feat_printvals(feat, val, len); \ + dccp_pr_debug_cat(") %s\n", mandatory ? "!" : ""); } while (0) + +#define dccp_feat_print_fnlist(fn_list) { \ + const struct dccp_feat_entry *___entry; \ + \ + dccp_pr_debug("List Dump:\n"); \ + list_for_each_entry(___entry, fn_list, node) \ + dccp_feat_print_entry(___entry); \ +} +#else /* ! CONFIG_IP_DCCP_DEBUG */ +#define dccp_feat_print_opt(opt, feat, val, len, mandatory) +#define dccp_feat_print_fnlist(fn_list) +#endif + static int __dccp_feat_activate(struct sock *sk, const int idx, const bool is_local, dccp_feat_val const *fval) { @@ -240,6 +334,10 @@ static int __dccp_feat_activate(struct sock *sk, const int idx, /* Location is RX if this is a local-RX or remote-TX feature */ rx = (is_local == (dccp_feat_table[idx].rxtx == FEAT_AT_RX)); + dccp_debug(" -> activating %s %s, %sval=%llu\n", rx ? "RX" : "TX", + dccp_feat_fname(dccp_feat_table[idx].feat_num), + fval ? "" : "default ", (unsigned long long)val); + return dccp_feat_table[idx].activation_hdlr(sk, val, rx); } @@ -544,6 +642,7 @@ int dccp_feat_insert_opts(struct dccp_sock *dp, struct dccp_request_sock *dreq, return -1; } } + dccp_feat_print_opt(opt, pos->feat_num, ptr, len, 0); if (dccp_insert_fn_opt(skb, opt, pos->feat_num, ptr, len, rpt)) return -1; @@ -797,6 +896,7 @@ int dccp_feat_finalise_settings(struct dccp_sock *dp) while (i--) if (ccids[i] > 0 && dccp_feat_propagate_ccid(fn, ccids[i], i)) return -1; + dccp_feat_print_fnlist(fn); return 0; } @@ -915,6 +1015,8 @@ static u8 dccp_feat_change_recv(struct list_head *fn, u8 is_mandatory, u8 opt, if (len == 0 || type == FEAT_UNKNOWN) /* 6.1 and 6.6.8 */ goto unknown_feature_or_value; + dccp_feat_print_opt(opt, feat, val, len, is_mandatory); + /* * Negotiation of NN features: Change R is invalid, so there is no * simultaneous negotiation; hence we do not look up in the list. @@ -1020,6 +1122,8 @@ static u8 dccp_feat_confirm_recv(struct list_head *fn, u8 is_mandatory, u8 opt, const bool local = (opt == DCCPO_CONFIRM_R); struct dccp_feat_entry *entry = dccp_feat_list_lookup(fn, feat, local); + dccp_feat_print_opt(opt, feat, val, len, is_mandatory); + if (entry == NULL) { /* nothing queued: ignore or handle error */ if (is_mandatory && type == FEAT_UNKNOWN) return DCCP_RESET_CODE_MANDATORY_ERROR; @@ -1217,9 +1321,10 @@ int dccp_feat_activate_values(struct sock *sk, struct list_head *fn_list) goto activation_failed; } if (cur->state != FEAT_STABLE) { - DCCP_CRIT("Negotiation of %s %u failed in state %u", + DCCP_CRIT("Negotiation of %s %s failed in state %s", cur->is_local ? "local" : "remote", - cur->feat_num, cur->state); + dccp_feat_fname(cur->feat_num), + dccp_feat_sname[cur->state]); goto activation_failed; } fvals[idx][cur->is_local] = &cur->val; @@ -1260,43 +1365,3 @@ activation_failed: dp->dccps_hc_rx_ackvec = NULL; return -1; } - -#ifdef CONFIG_IP_DCCP_DEBUG -const char *dccp_feat_typename(const u8 type) -{ - switch(type) { - case DCCPO_CHANGE_L: return("ChangeL"); - case DCCPO_CONFIRM_L: return("ConfirmL"); - case DCCPO_CHANGE_R: return("ChangeR"); - case DCCPO_CONFIRM_R: return("ConfirmR"); - /* the following case must not appear in feature negotation */ - default: dccp_pr_debug("unknown type %d [BUG!]\n", type); - } - return NULL; -} - -const char *dccp_feat_name(const u8 feat) -{ - static const char *feature_names[] = { - [DCCPF_RESERVED] = "Reserved", - [DCCPF_CCID] = "CCID", - [DCCPF_SHORT_SEQNOS] = "Allow Short Seqnos", - [DCCPF_SEQUENCE_WINDOW] = "Sequence Window", - [DCCPF_ECN_INCAPABLE] = "ECN Incapable", - [DCCPF_ACK_RATIO] = "Ack Ratio", - [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector", - [DCCPF_SEND_NDP_COUNT] = "Send NDP Count", - [DCCPF_MIN_CSUM_COVER] = "Min. Csum Coverage", - [DCCPF_DATA_CHECKSUM] = "Send Data Checksum", - }; - if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC) - return feature_names[DCCPF_RESERVED]; - - if (feat == DCCPF_SEND_LEV_RATE) - return "Send Loss Event Rate"; - if (feat >= DCCPF_MIN_CCID_SPECIFIC) - return "CCID-specific"; - - return feature_names[feat]; -} -#endif /* CONFIG_IP_DCCP_DEBUG */ diff --git a/net/dccp/feat.h b/net/dccp/feat.h index 40aa7a10bd5f..f96721619def 100644 --- a/net/dccp/feat.h +++ b/net/dccp/feat.h @@ -107,19 +107,6 @@ extern unsigned long sysctl_dccp_sequence_window; extern int sysctl_dccp_rx_ccid; extern int sysctl_dccp_tx_ccid; -#ifdef CONFIG_IP_DCCP_DEBUG -extern const char *dccp_feat_typename(const u8 type); -extern const char *dccp_feat_name(const u8 feat); - -static inline void dccp_feat_debug(const u8 type, const u8 feat, const u8 val) -{ - dccp_pr_debug("%s(%s (%d), %d)\n", dccp_feat_typename(type), - dccp_feat_name(feat), feat, val); -} -#else -#define dccp_feat_debug(type, feat, val) -#endif /* CONFIG_IP_DCCP_DEBUG */ - extern int dccp_feat_init(struct sock *sk); extern void dccp_feat_initialise_sysctls(void); extern int dccp_feat_register_sp(struct sock *sk, u8 feat, u8 is_local, diff --git a/net/dccp/options.c b/net/dccp/options.c index 3e2726c7182d..1b08cae9c65b 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -498,10 +498,6 @@ int dccp_insert_fn_opt(struct sk_buff *skb, u8 type, u8 feat, *to++ = *val; if (len) memcpy(to, val, len); - - dccp_pr_debug("%s(%s (%d), ...), length %d\n", - dccp_feat_typename(type), - dccp_feat_name(feat), feat, len); return 0; } -- cgit v1.2.3 From a9d8f9110d7e953c2f2b521087a4179677843c2a Mon Sep 17 00:00:00 2001 From: Evgeniy Polyakov Date: Mon, 19 Jan 2009 16:46:02 -0800 Subject: inet: Allowing more than 64k connections and heavily optimize bind(0) time. With simple extension to the binding mechanism, which allows to bind more than 64k sockets (or smaller amount, depending on sysctl parameters), we have to traverse the whole bind hash table to find out empty bucket. And while it is not a problem for example for 32k connections, bind() completion time grows exponentially (since after each successful binding we have to traverse one bucket more to find empty one) even if we start each time from random offset inside the hash table. So, when hash table is full, and we want to add another socket, we have to traverse the whole table no matter what, so effectivelly this will be the worst case performance and it will be constant. Attached picture shows bind() time depending on number of already bound sockets. Green area corresponds to the usual binding to zero port process, which turns on kernel port selection as described above. Red area is the bind process, when number of reuse-bound sockets is not limited by 64k (or sysctl parameters). The same exponential growth (hidden by the green area) before number of ports reaches sysctl limit. At this time bind hash table has exactly one reuse-enbaled socket in a bucket, but it is possible that they have different addresses. Actually kernel selects the first port to try randomly, so at the beginning bind will take roughly constant time, but with time number of port to check after random start will increase. And that will have exponential growth, but because of above random selection, not every next port selection will necessary take longer time than previous. So we have to consider the area below in the graph (if you could zoom it, you could find, that there are many different times placed there), so area can hide another. Blue area corresponds to the port selection optimization. This is rather simple design approach: hashtable now maintains (unprecise and racely updated) number of currently bound sockets, and when number of such sockets becomes greater than predefined value (I use maximum port range defined by sysctls), we stop traversing the whole bind hash table and just stop at first matching bucket after random start. Above limit roughly corresponds to the case, when bind hash table is full and we turned on mechanism of allowing to bind more reuse-enabled sockets, so it does not change behaviour of other sockets. Signed-off-by: Evgeniy Polyakov Tested-by: Denys Fedoryschenko Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 41 ++++++++++++++++++++++++++++++++++------- net/ipv4/inet_hashtables.c | 11 ++++++++++- 2 files changed, 44 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f26ab38680de..df8e72f07478 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -93,24 +93,40 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) struct inet_bind_hashbucket *head; struct hlist_node *node; struct inet_bind_bucket *tb; - int ret; + int ret, attempts = 5; struct net *net = sock_net(sk); + int smallest_size = -1, smallest_rover; local_bh_disable(); if (!snum) { int remaining, rover, low, high; +again: inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; - rover = net_random() % remaining + low; + smallest_rover = rover = net_random() % remaining + low; + smallest_size = -1; do { head = &hashinfo->bhash[inet_bhashfn(net, rover, hashinfo->bhash_size)]; spin_lock(&head->lock); inet_bind_bucket_for_each(tb, node, &head->chain) - if (ib_net(tb) == net && tb->port == rover) + if (ib_net(tb) == net && tb->port == rover) { + if (tb->fastreuse > 0 && + sk->sk_reuse && + sk->sk_state != TCP_LISTEN && + (tb->num_owners < smallest_size || smallest_size == -1)) { + smallest_size = tb->num_owners; + smallest_rover = rover; + if (hashinfo->bsockets > (high - low) + 1) { + spin_unlock(&head->lock); + snum = smallest_rover; + goto have_snum; + } + } goto next; + } break; next: spin_unlock(&head->lock); @@ -125,14 +141,19 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) * the top level, not from the 'break;' statement. */ ret = 1; - if (remaining <= 0) + if (remaining <= 0) { + if (smallest_size != -1) { + snum = smallest_rover; + goto have_snum; + } goto fail; - + } /* OK, here is the one we will use. HEAD is * non-NULL and we hold it's mutex. */ snum = rover; } else { +have_snum: head = &hashinfo->bhash[inet_bhashfn(net, snum, hashinfo->bhash_size)]; spin_lock(&head->lock); @@ -145,12 +166,18 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) tb_found: if (!hlist_empty(&tb->owners)) { if (tb->fastreuse > 0 && - sk->sk_reuse && sk->sk_state != TCP_LISTEN) { + sk->sk_reuse && sk->sk_state != TCP_LISTEN && + smallest_size == -1) { goto success; } else { ret = 1; - if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) + if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { + if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && --attempts >= 0) { + spin_unlock(&head->lock); + goto again; + } goto fail_unlock; + } } } tb_not_found: diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 6a1045da48d2..d7b6178bf48b 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -38,6 +38,7 @@ struct inet_bind_bucket *inet_bind_bucket_create(struct kmem_cache *cachep, write_pnet(&tb->ib_net, hold_net(net)); tb->port = snum; tb->fastreuse = 0; + tb->num_owners = 0; INIT_HLIST_HEAD(&tb->owners); hlist_add_head(&tb->node, &head->chain); } @@ -59,8 +60,13 @@ void inet_bind_bucket_destroy(struct kmem_cache *cachep, struct inet_bind_bucket void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, const unsigned short snum) { + struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + + hashinfo->bsockets++; + inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); + tb->num_owners++; inet_csk(sk)->icsk_bind_hash = tb; } @@ -75,9 +81,12 @@ static void __inet_put_port(struct sock *sk) struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; + hashinfo->bsockets--; + spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; __sk_del_bind_node(sk); + tb->num_owners--; inet_csk(sk)->icsk_bind_hash = NULL; inet_sk(sk)->num = 0; inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb); @@ -444,9 +453,9 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, */ inet_bind_bucket_for_each(tb, node, &head->chain) { if (ib_net(tb) == net && tb->port == port) { - WARN_ON(hlist_empty(&tb->owners)); if (tb->fastreuse >= 0) goto next_port; + WARN_ON(hlist_empty(&tb->owners)); if (!check_established(death_row, sk, port, &tw)) goto ok; -- cgit v1.2.3 From 749c10f931923451a4c59b4435d182aa9ae27a4f Mon Sep 17 00:00:00 2001 From: Timo Teras Date: Mon, 19 Jan 2009 17:22:12 -0800 Subject: gre: strict physical device binding Check the device on receive path and allow otherwise identical devices as long as the physical device differs. This is useful for NBMA tunnels, where you want to use different gre IP for each public IP available via different physical devices. Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 128 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 88 insertions(+), 40 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0101521f366b..4a43739c9035 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -164,67 +164,113 @@ static DEFINE_RWLOCK(ipgre_lock); /* Given src, dst and key, find appropriate for input tunnel. */ -static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, +static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, __be32 remote, __be32 local, __be32 key, __be16 gre_proto) { + struct net *net = dev_net(dev); + int link = dev->ifindex; unsigned h0 = HASH(remote); unsigned h1 = HASH(key); - struct ip_tunnel *t; - struct ip_tunnel *t2 = NULL; + struct ip_tunnel *t, *sel[4] = { NULL, NULL, NULL, NULL }; struct ipgre_net *ign = net_generic(net, ipgre_net_id); int dev_type = (gre_proto == htons(ETH_P_TEB)) ? ARPHRD_ETHER : ARPHRD_IPGRE; + int idx; for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { - if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { - if (t->parms.i_key == key && t->dev->flags & IFF_UP) { - if (t->dev->type == dev_type) - return t; - if (t->dev->type == ARPHRD_IPGRE && !t2) - t2 = t; - } - } + if (local != t->parms.iph.saddr || + remote != t->parms.iph.daddr || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IPGRE && + t->dev->type != dev_type) + continue; + + idx = 0; + if (t->parms.link != link) + idx |= 1; + if (t->dev->type != dev_type) + idx |= 2; + if (idx == 0) + return t; + if (sel[idx] == NULL) + sel[idx] = t; } for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { - if (remote == t->parms.iph.daddr) { - if (t->parms.i_key == key && t->dev->flags & IFF_UP) { - if (t->dev->type == dev_type) - return t; - if (t->dev->type == ARPHRD_IPGRE && !t2) - t2 = t; - } - } + if (remote != t->parms.iph.daddr || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IPGRE && + t->dev->type != dev_type) + continue; + + idx = 0; + if (t->parms.link != link) + idx |= 1; + if (t->dev->type != dev_type) + idx |= 2; + if (idx == 0) + return t; + if (sel[idx] == NULL) + sel[idx] = t; } for (t = ign->tunnels_l[h1]; t; t = t->next) { - if (local == t->parms.iph.saddr || - (local == t->parms.iph.daddr && - ipv4_is_multicast(local))) { - if (t->parms.i_key == key && t->dev->flags & IFF_UP) { - if (t->dev->type == dev_type) - return t; - if (t->dev->type == ARPHRD_IPGRE && !t2) - t2 = t; - } - } + if ((local != t->parms.iph.saddr && + (local != t->parms.iph.daddr || + !ipv4_is_multicast(local))) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IPGRE && + t->dev->type != dev_type) + continue; + + idx = 0; + if (t->parms.link != link) + idx |= 1; + if (t->dev->type != dev_type) + idx |= 2; + if (idx == 0) + return t; + if (sel[idx] == NULL) + sel[idx] = t; } for (t = ign->tunnels_wc[h1]; t; t = t->next) { - if (t->parms.i_key == key && t->dev->flags & IFF_UP) { - if (t->dev->type == dev_type) - return t; - if (t->dev->type == ARPHRD_IPGRE && !t2) - t2 = t; - } + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IPGRE && + t->dev->type != dev_type) + continue; + + idx = 0; + if (t->parms.link != link) + idx |= 1; + if (t->dev->type != dev_type) + idx |= 2; + if (idx == 0) + return t; + if (sel[idx] == NULL) + sel[idx] = t; } - if (t2) - return t2; + for (idx = 1; idx < ARRAY_SIZE(sel); idx++) + if (sel[idx] != NULL) + return sel[idx]; - if (ign->fb_tunnel_dev->flags&IFF_UP) + if (ign->fb_tunnel_dev->flags & IFF_UP) return netdev_priv(ign->fb_tunnel_dev); + return NULL; } @@ -284,6 +330,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net, __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; __be32 key = parms->i_key; + int link = parms->link; struct ip_tunnel *t, **tp; struct ipgre_net *ign = net_generic(net, ipgre_net_id); @@ -291,6 +338,7 @@ static struct ip_tunnel *ipgre_tunnel_find(struct net *net, if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && key == t->parms.i_key && + link == t->parms.link && type == t->dev->type) break; @@ -421,7 +469,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) } read_lock(&ipgre_lock); - t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, + t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); @@ -518,7 +566,7 @@ static int ipgre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); read_lock(&ipgre_lock); - if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), + if ((tunnel = ipgre_tunnel_lookup(skb->dev, iph->saddr, iph->daddr, key, gre_proto))) { struct net_device_stats *stats = &tunnel->dev->stats; -- cgit v1.2.3 From 4fe1d58bf56f69de68868630d222322a6b45bb55 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 22 Jan 2009 13:49:44 -0800 Subject: sctp/ipv6.c: use ipv6_addr_copy Signed-off-by: Joe Perches Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index ceaa4aa066ea..786227566696 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -97,8 +97,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, if (addr) { addr->a.v6.sin6_family = AF_INET6; addr->a.v6.sin6_port = 0; - memcpy(&addr->a.v6.sin6_addr, &ifa->addr, - sizeof(struct in6_addr)); + ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifa->addr); addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; addr->valid = 1; spin_lock_bh(&sctp_local_addr_lock); -- cgit v1.2.3 From 70a269e6c9c9b38b1a37dce068c59e9a912f8578 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:15 +0000 Subject: netns: ipmr: allocate mroute_socket per-namespace. Preliminary work to make IPv4 multicast routing netns-aware. Make IPv4 multicast routing mroute_socket per-namespace, moves it into struct netns_ipv4. At the moment, mroute_socket is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 14666449dc1c..ac324b702e8b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,9 +67,6 @@ #define CONFIG_IP_PIMSM 1 #endif -static struct sock *mroute_socket; - - /* Big lock, protecting vif table, mrt cache and mroute socket state. Note that the changes are semaphored via rtnl_lock. */ @@ -658,7 +655,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) skb->transport_header = skb->network_header; } - if (mroute_socket == NULL) { + if (init_net.ipv4.mroute_sk == NULL) { kfree_skb(skb); return -EINVAL; } @@ -666,7 +663,8 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) /* * Deliver to mrouted */ - if ((ret = sock_queue_rcv_skb(mroute_socket, skb))<0) { + ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb); + if (ret < 0) { if (net_ratelimit()) printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); kfree_skb(skb); @@ -896,11 +894,11 @@ static void mroute_clean_tables(struct sock *sk) static void mrtsock_destruct(struct sock *sk) { rtnl_lock(); - if (sk == mroute_socket) { + if (sk == init_net.ipv4.mroute_sk) { IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; write_lock_bh(&mrt_lock); - mroute_socket = NULL; + init_net.ipv4.mroute_sk = NULL; write_unlock_bh(&mrt_lock); mroute_clean_tables(sk); @@ -922,7 +920,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int struct mfcctl mfc; if (optname != MRT_INIT) { - if (sk != mroute_socket && !capable(CAP_NET_ADMIN)) + if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -935,7 +933,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int return -ENOPROTOOPT; rtnl_lock(); - if (mroute_socket) { + if (init_net.ipv4.mroute_sk) { rtnl_unlock(); return -EADDRINUSE; } @@ -943,7 +941,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { write_lock_bh(&mrt_lock); - mroute_socket = sk; + init_net.ipv4.mroute_sk = sk; write_unlock_bh(&mrt_lock); IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; @@ -951,7 +949,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int rtnl_unlock(); return ret; case MRT_DONE: - if (sk != mroute_socket) + if (sk != init_net.ipv4.mroute_sk) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: @@ -964,7 +962,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int return -ENFILE; rtnl_lock(); if (optname == MRT_ADD_VIF) { - ret = vif_add(&vif, sk==mroute_socket); + ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk); } else { ret = vif_delete(vif.vifc_vifi, 0); } @@ -985,7 +983,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int if (optname == MRT_DEL_MFC) ret = ipmr_mfc_delete(&mfc); else - ret = ipmr_mfc_add(&mfc, sk==mroute_socket); + ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk); rtnl_unlock(); return ret; /* @@ -1425,9 +1423,9 @@ int ip_mr_input(struct sk_buff *skb) that we can forward NO IGMP messages. */ read_lock(&mrt_lock); - if (mroute_socket) { + if (init_net.ipv4.mroute_sk) { nf_reset(skb); - raw_rcv(mroute_socket, skb); + raw_rcv(init_net.ipv4.mroute_sk, skb); read_unlock(&mrt_lock); return 0; } -- cgit v1.2.3 From cf958ae377ee2545ae70cf48d38e7eb4308c27ea Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:16 +0000 Subject: netns: ipmr: dynamically allocate vif_table Preliminary work to make IPv6 multicast routing netns-aware. Dynamically allocate interface table vif_table and move it to struct netns_ipv4, and update MIF_EXISTS() macro. At the moment, vif_table is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 109 +++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 68 insertions(+), 41 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index ac324b702e8b..75a5f79cc226 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -77,10 +77,7 @@ static DEFINE_RWLOCK(mrt_lock); * Multicast router control variables */ -static struct vif_device vif_table[MAXVIFS]; /* Devices */ -static int maxvif; - -#define VIF_EXISTS(idx) (vif_table[idx].dev != NULL) +#define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) static int mroute_do_assert; /* Set in PIM assert */ static int mroute_do_pim; @@ -286,10 +283,10 @@ static int vif_delete(int vifi, int notify) struct net_device *dev; struct in_device *in_dev; - if (vifi < 0 || vifi >= maxvif) + if (vifi < 0 || vifi >= init_net.ipv4.maxvif) return -EADDRNOTAVAIL; - v = &vif_table[vifi]; + v = &init_net.ipv4.vif_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -305,13 +302,13 @@ static int vif_delete(int vifi, int notify) reg_vif_num = -1; #endif - if (vifi+1 == maxvif) { + if (vifi+1 == init_net.ipv4.maxvif) { int tmp; for (tmp=vifi-1; tmp>=0; tmp--) { - if (VIF_EXISTS(tmp)) + if (VIF_EXISTS(&init_net, tmp)) break; } - maxvif = tmp+1; + init_net.ipv4.maxvif = tmp+1; } write_unlock_bh(&mrt_lock); @@ -411,8 +408,9 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXVIFS); - for (vifi=0; vifimfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) cache->mfc_un.res.minvif = vifi; @@ -425,13 +423,13 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) static int vif_add(struct vifctl *vifc, int mrtsock) { int vifi = vifc->vifc_vifi; - struct vif_device *v = &vif_table[vifi]; + struct vif_device *v = &init_net.ipv4.vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; int err; /* Is vif busy ? */ - if (VIF_EXISTS(vifi)) + if (VIF_EXISTS(&init_net, vifi)) return -EADDRINUSE; switch (vifc->vifc_flags) { @@ -509,8 +507,8 @@ static int vif_add(struct vifctl *vifc, int mrtsock) if (v->flags&VIFF_REGISTER) reg_vif_num = vifi; #endif - if (vifi+1 > maxvif) - maxvif = vifi+1; + if (vifi+1 > init_net.ipv4.maxvif) + init_net.ipv4.maxvif = vifi+1; write_unlock_bh(&mrt_lock); return 0; } @@ -849,8 +847,8 @@ static void mroute_clean_tables(struct sock *sk) /* * Shut down all active vif entries */ - for (i=0; i= maxvif) + if (vr.vifi >= init_net.ipv4.maxvif) return -EINVAL; read_lock(&mrt_lock); - vif=&vif_table[vr.vifi]; - if (VIF_EXISTS(vr.vifi)) { + vif = &init_net.ipv4.vif_table[vr.vifi]; + if (VIF_EXISTS(&init_net, vr.vifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1140,8 +1138,8 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - v=&vif_table[0]; - for (ct=0; ctdev == dev) vif_delete(ct, 1); } @@ -1204,7 +1202,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) { const struct iphdr *iph = ip_hdr(skb); - struct vif_device *vif = &vif_table[vifi]; + struct vif_device *vif = &init_net.ipv4.vif_table[vifi]; struct net_device *dev; struct rtable *rt; int encap = 0; @@ -1305,8 +1303,8 @@ out_free: static int ipmr_find_vif(struct net_device *dev) { int ct; - for (ct=maxvif-1; ct>=0; ct--) { - if (vif_table[ct].dev == dev) + for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) { + if (init_net.ipv4.vif_table[ct].dev == dev) break; } return ct; @@ -1326,7 +1324,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (vif_table[vif].dev != skb->dev) { + if (init_net.ipv4.vif_table[vif].dev != skb->dev) { int true_vifi; if (skb->rtable->fl.iif == 0) { @@ -1362,8 +1360,8 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local goto dont_forward; } - vif_table[vif].pkt_in++; - vif_table[vif].bytes_in += skb->len; + init_net.ipv4.vif_table[vif].pkt_in++; + init_net.ipv4.vif_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1500,7 +1498,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) read_lock(&mrt_lock); if (reg_vif_num >= 0) - reg_dev = vif_table[reg_vif_num].dev; + reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -1581,7 +1579,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; - struct net_device *dev = vif_table[c->mfc_parent].dev; + struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; @@ -1597,7 +1595,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = vif_table[ct].dev->ifindex; + nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } @@ -1672,11 +1670,11 @@ struct ipmr_vif_iter { static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, loff_t pos) { - for (iter->ct = 0; iter->ct < maxvif; ++iter->ct) { - if (!VIF_EXISTS(iter->ct)) + for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) { + if (!VIF_EXISTS(&init_net, iter->ct)) continue; if (pos-- == 0) - return &vif_table[iter->ct]; + return &init_net.ipv4.vif_table[iter->ct]; } return NULL; } @@ -1697,10 +1695,10 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (v == SEQ_START_TOKEN) return ipmr_vif_seq_idx(iter, 0); - while (++iter->ct < maxvif) { - if (!VIF_EXISTS(iter->ct)) + while (++iter->ct < init_net.ipv4.maxvif) { + if (!VIF_EXISTS(&init_net, iter->ct)) continue; - return &vif_table[iter->ct]; + return &init_net.ipv4.vif_table[iter->ct]; } return NULL; } @@ -1722,7 +1720,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", - vif - vif_table, + vif - init_net.ipv4.vif_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags, vif->local, vif->remote); @@ -1864,9 +1862,9 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++ ) { - if (VIF_EXISTS(n) - && mfc->mfc_un.res.ttls[n] < 255) - seq_printf(seq, + if (VIF_EXISTS(&init_net, n) && + mfc->mfc_un.res.ttls[n] < 255) + seq_printf(seq, " %2d:%-3d", n, mfc->mfc_un.res.ttls[n]); } @@ -1913,6 +1911,29 @@ static struct net_protocol pim_protocol = { /* * Setup for IP multicast routing */ +static int __net_init ipmr_net_init(struct net *net) +{ + int err = 0; + + net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device), + GFP_KERNEL); + if (!net->ipv4.vif_table) { + err = -ENOMEM; + goto fail; + } +fail: + return err; +} + +static void __net_exit ipmr_net_exit(struct net *net) +{ + kfree(net->ipv4.vif_table); +} + +static struct pernet_operations ipmr_net_ops = { + .init = ipmr_net_init, + .exit = ipmr_net_exit, +}; int __init ip_mr_init(void) { @@ -1925,6 +1946,10 @@ int __init ip_mr_init(void) if (!mrt_cachep) return -ENOMEM; + err = register_pernet_subsys(&ipmr_net_ops); + if (err) + goto reg_pernet_fail; + setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0); err = register_netdevice_notifier(&ip_mr_notifier); if (err) @@ -1945,6 +1970,8 @@ proc_vif_fail: #endif reg_notif_fail: del_timer(&ipmr_expire_timer); + unregister_pernet_subsys(&ipmr_net_ops); +reg_pernet_fail: kmem_cache_destroy(mrt_cachep); return err; } -- cgit v1.2.3 From 5c0a66f5f3c9c59e2c341400048e2cff768e67a9 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:17 +0000 Subject: netns: ipmr: store netns in struct mfc_cache This patch stores into struct mfc_cache the network namespace each mfc_cache belongs to. The new member is mfc_net. mfc_net is assigned at cache allocation and doesn't change during the rest of the cache entry life. A new net parameter is added to ipmr_cache_alloc/ipmr_cache_alloc_unres. This will help to retrieve the current netns around the IPv4 multicast routing code. At the moment, all mfc_cache are allocated in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 75a5f79cc226..8428a0fb5c10 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -327,6 +327,12 @@ static int vif_delete(int vifi, int notify) return 0; } +static inline void ipmr_cache_free(struct mfc_cache *c) +{ + release_net(mfc_net(c)); + kmem_cache_free(mrt_cachep, c); +} + /* Destroy an unresolved cache entry, killing queued skbs and reporting error to netlink readers. */ @@ -353,7 +359,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c) kfree_skb(skb); } - kmem_cache_free(mrt_cachep, c); + ipmr_cache_free(c); } @@ -528,22 +534,24 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) /* * Allocate a multicast cache entry */ -static struct mfc_cache *ipmr_cache_alloc(void) +static struct mfc_cache *ipmr_cache_alloc(struct net *net) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (c == NULL) return NULL; c->mfc_un.res.minvif = MAXVIFS; + mfc_net_set(c, net); return c; } -static struct mfc_cache *ipmr_cache_alloc_unres(void) +static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); if (c == NULL) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10*HZ; + mfc_net_set(c, net); return c; } @@ -695,7 +703,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) */ if (atomic_read(&cache_resolve_queue_len) >= 10 || - (c=ipmr_cache_alloc_unres())==NULL) { + (c = ipmr_cache_alloc_unres(&init_net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); @@ -718,7 +726,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) */ spin_unlock_bh(&mfc_unres_lock); - kmem_cache_free(mrt_cachep, c); + ipmr_cache_free(c); kfree_skb(skb); return err; } @@ -763,7 +771,7 @@ static int ipmr_mfc_delete(struct mfcctl *mfc) *cp = c->next; write_unlock_bh(&mrt_lock); - kmem_cache_free(mrt_cachep, c); + ipmr_cache_free(c); return 0; } } @@ -796,7 +804,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; - c = ipmr_cache_alloc(); + c = ipmr_cache_alloc(&init_net); if (c == NULL) return -ENOMEM; @@ -831,7 +839,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) if (uc) { ipmr_cache_resolve(uc, c); - kmem_cache_free(mrt_cachep, uc); + ipmr_cache_free(uc); } return 0; } @@ -868,7 +876,7 @@ static void mroute_clean_tables(struct sock *sk) *cp = c->next; write_unlock_bh(&mrt_lock); - kmem_cache_free(mrt_cachep, c); + ipmr_cache_free(c); } } -- cgit v1.2.3 From 2bb8b26c3ea8bde1943dc5cd4dda2dc9f48fb281 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:18 +0000 Subject: netns: ipmr: dynamically allocate mfc_cache_array Preliminary work to make IPv4 multicast routing netns-aware. Dynamically allocate IPv4 multicast forwarding cache, mfc_cache_array, and move it to struct netns_ipv4. At the moment, mfc_cache_array is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 41 ++++++++++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8428a0fb5c10..35b868dd3bfd 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -82,8 +82,6 @@ static DEFINE_RWLOCK(mrt_lock); static int mroute_do_assert; /* Set in PIM assert */ static int mroute_do_pim; -static struct mfc_cache *mfc_cache_array[MFC_LINES]; /* Forwarding cache */ - static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ static atomic_t cache_resolve_queue_len; /* Size of unresolved */ @@ -524,7 +522,7 @@ static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) int line = MFC_HASH(mcastgrp, origin); struct mfc_cache *c; - for (c=mfc_cache_array[line]; c; c = c->next) { + for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) { if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) break; } @@ -764,7 +762,8 @@ static int ipmr_mfc_delete(struct mfcctl *mfc) line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); - for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { + for (cp = &init_net.ipv4.mfc_cache_array[line]; + (c = *cp) != NULL; cp = &c->next) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { write_lock_bh(&mrt_lock); @@ -785,7 +784,8 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); - for (cp=&mfc_cache_array[line]; (c=*cp) != NULL; cp = &c->next) { + for (cp = &init_net.ipv4.mfc_cache_array[line]; + (c = *cp) != NULL; cp = &c->next) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) break; @@ -816,8 +816,8 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) c->mfc_flags |= MFC_STATIC; write_lock_bh(&mrt_lock); - c->next = mfc_cache_array[line]; - mfc_cache_array[line] = c; + c->next = init_net.ipv4.mfc_cache_array[line]; + init_net.ipv4.mfc_cache_array[line] = c; write_unlock_bh(&mrt_lock); /* @@ -866,7 +866,7 @@ static void mroute_clean_tables(struct sock *sk) for (i=0; imfc_flags&MFC_STATIC) { cp = &c->next; @@ -1767,10 +1767,11 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) { struct mfc_cache *mfc; - it->cache = mfc_cache_array; + it->cache = init_net.ipv4.mfc_cache_array; read_lock(&mrt_lock); for (it->ct = 0; it->ct < MFC_LINES; it->ct++) - for (mfc = mfc_cache_array[it->ct]; mfc; mfc = mfc->next) + for (mfc = init_net.ipv4.mfc_cache_array[it->ct]; + mfc; mfc = mfc->next) if (pos-- == 0) return mfc; read_unlock(&mrt_lock); @@ -1812,10 +1813,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (it->cache == &mfc_unres_queue) goto end_of_list; - BUG_ON(it->cache != mfc_cache_array); + BUG_ON(it->cache != init_net.ipv4.mfc_cache_array); while (++it->ct < MFC_LINES) { - mfc = mfc_cache_array[it->ct]; + mfc = init_net.ipv4.mfc_cache_array[it->ct]; if (mfc) return mfc; } @@ -1843,7 +1844,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) if (it->cache == &mfc_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == mfc_cache_array) + else if (it->cache == init_net.ipv4.mfc_cache_array) read_unlock(&mrt_lock); } @@ -1929,12 +1930,26 @@ static int __net_init ipmr_net_init(struct net *net) err = -ENOMEM; goto fail; } + + /* Forwarding cache */ + net->ipv4.mfc_cache_array = kcalloc(MFC_LINES, + sizeof(struct mfc_cache *), + GFP_KERNEL); + if (!net->ipv4.mfc_cache_array) { + err = -ENOMEM; + goto fail_mfc_cache; + } + return 0; + +fail_mfc_cache: + kfree(net->ipv4.vif_table); fail: return err; } static void __net_exit ipmr_net_exit(struct net *net) { + kfree(net->ipv4.mfc_cache_array); kfree(net->ipv4.vif_table); } -- cgit v1.2.3 From 1e8fb3b6a4ac6c5e486298d88289038456957545 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:19 +0000 Subject: netns: ipmr: declare counter cache_resolve_queue_len per-namespace Preliminary work to make IPv4 multicast routing netns-aware. Declare variable cache_resolve_queue_len per-namespace: move it into struct netns_ipv4. This variable counts the number of unresolved cache entries queued in the list mfc_unres_queue. This list is kept global to all netns as the number of entries per namespace is limited to 10 (hardcoded in routine ipmr_cache_unresolved). Entries belonging to different namespaces in mfc_unres_queue will be identified by matching the mfc_net member introduced previously in struct mfc_cache. Keeping this list global to all netns, also allows us to keep a single timer (ipmr_expire_timer) to handle their expiration. In some places cache_resolve_queue_len value was tested for arming or deleting the timer. These tests were equivalent to testing mfc_unres_queue value instead and are replaced in this patch. At the moment, cache_resolve_queue_len is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 35b868dd3bfd..feafd14eb7b9 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -83,7 +83,6 @@ static int mroute_do_assert; /* Set in PIM assert */ static int mroute_do_pim; static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ -static atomic_t cache_resolve_queue_len; /* Size of unresolved */ /* Special spinlock for queue of unresolved entries */ static DEFINE_SPINLOCK(mfc_unres_lock); @@ -340,7 +339,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c) struct sk_buff *skb; struct nlmsgerr *e; - atomic_dec(&cache_resolve_queue_len); + atomic_dec(&init_net.ipv4.cache_resolve_queue_len); while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { @@ -374,7 +373,7 @@ static void ipmr_expire_process(unsigned long dummy) return; } - if (atomic_read(&cache_resolve_queue_len) == 0) + if (mfc_unres_queue == NULL) goto out; now = jiffies; @@ -395,7 +394,7 @@ static void ipmr_expire_process(unsigned long dummy) ipmr_destroy_unres(c); } - if (atomic_read(&cache_resolve_queue_len)) + if (mfc_unres_queue != NULL) mod_timer(&ipmr_expire_timer, jiffies + expires); out: @@ -690,7 +689,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) spin_lock_bh(&mfc_unres_lock); for (c=mfc_unres_queue; c; c=c->next) { - if (c->mfc_mcastgrp == iph->daddr && + if (net_eq(mfc_net(c), &init_net) && + c->mfc_mcastgrp == iph->daddr && c->mfc_origin == iph->saddr) break; } @@ -700,7 +700,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&cache_resolve_queue_len) >= 10 || + if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 || (c = ipmr_cache_alloc_unres(&init_net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); @@ -729,7 +729,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) return err; } - atomic_inc(&cache_resolve_queue_len); + atomic_inc(&init_net.ipv4.cache_resolve_queue_len); c->next = mfc_unres_queue; mfc_unres_queue = c; @@ -827,14 +827,16 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) spin_lock_bh(&mfc_unres_lock); for (cp = &mfc_unres_queue; (uc=*cp) != NULL; cp = &uc->next) { - if (uc->mfc_origin == c->mfc_origin && + if (net_eq(mfc_net(uc), &init_net) && + uc->mfc_origin == c->mfc_origin && uc->mfc_mcastgrp == c->mfc_mcastgrp) { *cp = uc->next; - if (atomic_dec_and_test(&cache_resolve_queue_len)) - del_timer(&ipmr_expire_timer); + atomic_dec(&init_net.ipv4.cache_resolve_queue_len); break; } } + if (mfc_unres_queue == NULL) + del_timer(&ipmr_expire_timer); spin_unlock_bh(&mfc_unres_lock); if (uc) { @@ -880,18 +882,19 @@ static void mroute_clean_tables(struct sock *sk) } } - if (atomic_read(&cache_resolve_queue_len) != 0) { - struct mfc_cache *c; + if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) { + struct mfc_cache *c, **cp; spin_lock_bh(&mfc_unres_lock); - while (mfc_unres_queue != NULL) { - c = mfc_unres_queue; - mfc_unres_queue = c->next; - spin_unlock_bh(&mfc_unres_lock); + cp = &mfc_unres_queue; + while ((c = *cp) != NULL) { + if (!net_eq(mfc_net(c), &init_net)) { + cp = &c->next; + continue; + } + *cp = c->next; ipmr_destroy_unres(c); - - spin_lock_bh(&mfc_unres_lock); } spin_unlock_bh(&mfc_unres_lock); } -- cgit v1.2.3 From 6f9374a9342e896c68df7cf7c0b039ab5cca994c Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:20 +0000 Subject: netns: ipmr: declare mroute_do_assert and mroute_do_pim per-namespace Preliminary work to make IPv4 multicast routing netns-aware. Declare IPv multicast routing variables 'mroute_do_assert' and 'mroute_do_pim' per-namespace in struct netns_ipv4. At the moment, these variables are only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index feafd14eb7b9..d6a28acc0683 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -79,9 +79,6 @@ static DEFINE_RWLOCK(mrt_lock); #define VIF_EXISTS(_net, _idx) ((_net)->ipv4.vif_table[_idx].dev != NULL) -static int mroute_do_assert; /* Set in PIM assert */ -static int mroute_do_pim; - static struct mfc_cache *mfc_unres_queue; /* Queue of unresolved entries */ /* Special spinlock for queue of unresolved entries */ @@ -1003,7 +1000,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int int v; if (get_user(v,(int __user *)optval)) return -EFAULT; - mroute_do_assert=(v)?1:0; + init_net.ipv4.mroute_do_assert = (v) ? 1 : 0; return 0; } #ifdef CONFIG_IP_PIMSM @@ -1017,11 +1014,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int rtnl_lock(); ret = 0; - if (v != mroute_do_pim) { - mroute_do_pim = v; - mroute_do_assert = v; + if (v != init_net.ipv4.mroute_do_pim) { + init_net.ipv4.mroute_do_pim = v; + init_net.ipv4.mroute_do_assert = v; #ifdef CONFIG_IP_PIMSM_V2 - if (mroute_do_pim) + if (init_net.ipv4.mroute_do_pim) ret = inet_add_protocol(&pim_protocol, IPPROTO_PIM); else @@ -1073,10 +1070,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int val = 0x0305; #ifdef CONFIG_IP_PIMSM else if (optname == MRT_PIM) - val = mroute_do_pim; + val = init_net.ipv4.mroute_do_pim; #endif else - val = mroute_do_assert; + val = init_net.ipv4.mroute_do_assert; if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; @@ -1356,13 +1353,14 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local cache->mfc_un.res.wrong_if++; true_vifi = ipmr_find_vif(skb->dev); - if (true_vifi >= 0 && mroute_do_assert && + if (true_vifi >= 0 && init_net.ipv4.mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, so that we cannot check that packet arrived on an oif. It is bad, but otherwise we would need to move pretty large chunk of pimd to kernel. Ough... --ANK */ - (mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && + (init_net.ipv4.mroute_do_pim || + cache->mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { cache->mfc_un.res.last_assert = jiffies; @@ -1550,7 +1548,7 @@ int pim_rcv_v1(struct sk_buff * skb) pim = igmp_hdr(skb); - if (!mroute_do_pim || + if (!init_net.ipv4.mroute_do_pim || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; -- cgit v1.2.3 From 6c5143dbcfe50ac722965dc7d096abbeeec8bb33 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:21 +0000 Subject: netns: ipmr: declare reg_vif_num per-namespace Preliminary work to make IPv4 multicast routing netns-aware. Declare variable 'reg_vif_num' per-namespace, move into struct netns_ipv4. At the moment, this variable is only referenced in init_net. Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d6a28acc0683..346e67b50d6c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -197,14 +197,12 @@ failure: #ifdef CONFIG_IP_PIMSM -static int reg_vif_num = -1; - static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ipmr_cache_report(skb, reg_vif_num, IGMPMSG_WHOLEPKT); + ipmr_cache_report(skb, init_net.ipv4.mroute_reg_vif_num, IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; @@ -292,8 +290,8 @@ static int vif_delete(int vifi, int notify) } #ifdef CONFIG_IP_PIMSM - if (vifi == reg_vif_num) - reg_vif_num = -1; + if (vifi == init_net.ipv4.mroute_reg_vif_num) + init_net.ipv4.mroute_reg_vif_num = -1; #endif if (vifi+1 == init_net.ipv4.maxvif) { @@ -439,7 +437,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ - if (reg_vif_num >= 0) + if (init_net.ipv4.mroute_reg_vif_num >= 0) return -EADDRINUSE; dev = ipmr_reg_vif(); if (!dev) @@ -505,7 +503,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) v->dev = dev; #ifdef CONFIG_IP_PIMSM if (v->flags&VIFF_REGISTER) - reg_vif_num = vifi; + init_net.ipv4.mroute_reg_vif_num = vifi; #endif if (vifi+1 > init_net.ipv4.maxvif) init_net.ipv4.maxvif = vifi+1; @@ -623,7 +621,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); msg->im_msgtype = IGMPMSG_WHOLEPKT; msg->im_mbz = 0; - msg->im_vif = reg_vif_num; + msg->im_vif = init_net.ipv4.mroute_reg_vif_num; ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); @@ -1506,8 +1504,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) return 1; read_lock(&mrt_lock); - if (reg_vif_num >= 0) - reg_dev = init_net.ipv4.vif_table[reg_vif_num].dev; + if (init_net.ipv4.mroute_reg_vif_num >= 0) + reg_dev = init_net.ipv4.vif_table[init_net.ipv4.mroute_reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -1940,6 +1938,10 @@ static int __net_init ipmr_net_init(struct net *net) err = -ENOMEM; goto fail_mfc_cache; } + +#ifdef CONFIG_IP_PIMSM + net->ipv4.mroute_reg_vif_num = -1; +#endif return 0; fail_mfc_cache: -- cgit v1.2.3 From f6bb451476be53d456e73bcfd82356afd680bbb0 Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:22 +0000 Subject: netns: ipmr: declare ipmr /proc/net entries per-namespace Declare IPv4 multicast forwarding /proc/net entries per-namespace: /proc/net/ip_mr_vif /proc/net/ip_mr_cache Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 101 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 62 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 346e67b50d6c..a4fd97f1920c 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1671,17 +1671,19 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif */ struct ipmr_vif_iter { + struct seq_net_private p; int ct; }; -static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, +static struct vif_device *ipmr_vif_seq_idx(struct net *net, + struct ipmr_vif_iter *iter, loff_t pos) { - for (iter->ct = 0; iter->ct < init_net.ipv4.maxvif; ++iter->ct) { - if (!VIF_EXISTS(&init_net, iter->ct)) + for (iter->ct = 0; iter->ct < net->ipv4.maxvif; ++iter->ct) { + if (!VIF_EXISTS(net, iter->ct)) continue; if (pos-- == 0) - return &init_net.ipv4.vif_table[iter->ct]; + return &net->ipv4.vif_table[iter->ct]; } return NULL; } @@ -1689,23 +1691,26 @@ static struct vif_device *ipmr_vif_seq_idx(struct ipmr_vif_iter *iter, static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos) __acquires(mrt_lock) { + struct net *net = seq_file_net(seq); + read_lock(&mrt_lock); - return *pos ? ipmr_vif_seq_idx(seq->private, *pos - 1) + return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1) : SEQ_START_TOKEN; } static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct ipmr_vif_iter *iter = seq->private; + struct net *net = seq_file_net(seq); ++*pos; if (v == SEQ_START_TOKEN) - return ipmr_vif_seq_idx(iter, 0); + return ipmr_vif_seq_idx(net, iter, 0); - while (++iter->ct < init_net.ipv4.maxvif) { - if (!VIF_EXISTS(&init_net, iter->ct)) + while (++iter->ct < net->ipv4.maxvif) { + if (!VIF_EXISTS(net, iter->ct)) continue; - return &init_net.ipv4.vif_table[iter->ct]; + return &net->ipv4.vif_table[iter->ct]; } return NULL; } @@ -1718,6 +1723,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v) static int ipmr_vif_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq_file_net(seq); + if (v == SEQ_START_TOKEN) { seq_puts(seq, "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n"); @@ -1727,7 +1734,7 @@ static int ipmr_vif_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n", - vif - init_net.ipv4.vif_table, + vif - net->ipv4.vif_table, name, vif->bytes_in, vif->pkt_in, vif->bytes_out, vif->pkt_out, vif->flags, vif->local, vif->remote); @@ -1744,8 +1751,8 @@ static const struct seq_operations ipmr_vif_seq_ops = { static int ipmr_vif_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ipmr_vif_seq_ops, - sizeof(struct ipmr_vif_iter)); + return seq_open_net(inode, file, &ipmr_vif_seq_ops, + sizeof(struct ipmr_vif_iter)); } static const struct file_operations ipmr_vif_fops = { @@ -1753,23 +1760,25 @@ static const struct file_operations ipmr_vif_fops = { .open = ipmr_vif_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; struct ipmr_mfc_iter { + struct seq_net_private p; struct mfc_cache **cache; int ct; }; -static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) +static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net, + struct ipmr_mfc_iter *it, loff_t pos) { struct mfc_cache *mfc; - it->cache = init_net.ipv4.mfc_cache_array; + it->cache = net->ipv4.mfc_cache_array; read_lock(&mrt_lock); for (it->ct = 0; it->ct < MFC_LINES; it->ct++) - for (mfc = init_net.ipv4.mfc_cache_array[it->ct]; + for (mfc = net->ipv4.mfc_cache_array[it->ct]; mfc; mfc = mfc->next) if (pos-- == 0) return mfc; @@ -1778,7 +1787,8 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) it->cache = &mfc_unres_queue; spin_lock_bh(&mfc_unres_lock); for (mfc = mfc_unres_queue; mfc; mfc = mfc->next) - if (pos-- == 0) + if (net_eq(mfc_net(mfc), net) && + pos-- == 0) return mfc; spin_unlock_bh(&mfc_unres_lock); @@ -1790,9 +1800,11 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos) static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) { struct ipmr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); + it->cache = NULL; it->ct = 0; - return *pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1) + return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1) : SEQ_START_TOKEN; } @@ -1800,11 +1812,12 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct mfc_cache *mfc = v; struct ipmr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); ++*pos; if (v == SEQ_START_TOKEN) - return ipmr_mfc_seq_idx(seq->private, 0); + return ipmr_mfc_seq_idx(net, seq->private, 0); if (mfc->next) return mfc->next; @@ -1812,10 +1825,10 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (it->cache == &mfc_unres_queue) goto end_of_list; - BUG_ON(it->cache != init_net.ipv4.mfc_cache_array); + BUG_ON(it->cache != net->ipv4.mfc_cache_array); while (++it->ct < MFC_LINES) { - mfc = init_net.ipv4.mfc_cache_array[it->ct]; + mfc = net->ipv4.mfc_cache_array[it->ct]; if (mfc) return mfc; } @@ -1827,6 +1840,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) spin_lock_bh(&mfc_unres_lock); mfc = mfc_unres_queue; + while (mfc && !net_eq(mfc_net(mfc), net)) + mfc = mfc->next; if (mfc) return mfc; @@ -1840,16 +1855,18 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v) { struct ipmr_mfc_iter *it = seq->private; + struct net *net = seq_file_net(seq); if (it->cache == &mfc_unres_queue) spin_unlock_bh(&mfc_unres_lock); - else if (it->cache == init_net.ipv4.mfc_cache_array) + else if (it->cache == net->ipv4.mfc_cache_array) read_unlock(&mrt_lock); } static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) { int n; + struct net *net = seq_file_net(seq); if (v == SEQ_START_TOKEN) { seq_puts(seq, @@ -1870,7 +1887,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v) mfc->mfc_un.res.wrong_if); for (n = mfc->mfc_un.res.minvif; n < mfc->mfc_un.res.maxvif; n++ ) { - if (VIF_EXISTS(&init_net, n) && + if (VIF_EXISTS(net, n) && mfc->mfc_un.res.ttls[n] < 255) seq_printf(seq, " %2d:%-3d", @@ -1896,8 +1913,8 @@ static const struct seq_operations ipmr_mfc_seq_ops = { static int ipmr_mfc_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ipmr_mfc_seq_ops, - sizeof(struct ipmr_mfc_iter)); + return seq_open_net(inode, file, &ipmr_mfc_seq_ops, + sizeof(struct ipmr_mfc_iter)); } static const struct file_operations ipmr_mfc_fops = { @@ -1905,7 +1922,7 @@ static const struct file_operations ipmr_mfc_fops = { .open = ipmr_mfc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif @@ -1942,8 +1959,22 @@ static int __net_init ipmr_net_init(struct net *net) #ifdef CONFIG_IP_PIMSM net->ipv4.mroute_reg_vif_num = -1; #endif + +#ifdef CONFIG_PROC_FS + err = -ENOMEM; + if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops)) + goto proc_vif_fail; + if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops)) + goto proc_cache_fail; +#endif return 0; +#ifdef CONFIG_PROC_FS +proc_cache_fail: + proc_net_remove(net, "ip_mr_vif"); +proc_vif_fail: + kfree(net->ipv4.mfc_cache_array); +#endif fail_mfc_cache: kfree(net->ipv4.vif_table); fail: @@ -1952,6 +1983,10 @@ fail: static void __net_exit ipmr_net_exit(struct net *net) { +#ifdef CONFIG_PROC_FS + proc_net_remove(net, "ip_mr_cache"); + proc_net_remove(net, "ip_mr_vif"); +#endif kfree(net->ipv4.mfc_cache_array); kfree(net->ipv4.vif_table); } @@ -1980,20 +2015,8 @@ int __init ip_mr_init(void) err = register_netdevice_notifier(&ip_mr_notifier); if (err) goto reg_notif_fail; -#ifdef CONFIG_PROC_FS - err = -ENOMEM; - if (!proc_net_fops_create(&init_net, "ip_mr_vif", 0, &ipmr_vif_fops)) - goto proc_vif_fail; - if (!proc_net_fops_create(&init_net, "ip_mr_cache", 0, &ipmr_mfc_fops)) - goto proc_cache_fail; -#endif return 0; -#ifdef CONFIG_PROC_FS -proc_cache_fail: - proc_net_remove(&init_net, "ip_mr_vif"); -proc_vif_fail: - unregister_netdevice_notifier(&ip_mr_notifier); -#endif + reg_notif_fail: del_timer(&ipmr_expire_timer); unregister_pernet_subsys(&ipmr_net_ops); -- cgit v1.2.3 From 4feb88e5c694bfe414cbc3ce0e383f7f7038f90b Mon Sep 17 00:00:00 2001 From: Benjamin Thery Date: Thu, 22 Jan 2009 04:56:23 +0000 Subject: netns: ipmr: enable namespace support in ipv4 multicast routing code This last patch makes the appropriate changes to use and propagate the network namespace where needed in IPv4 multicast routing code. This consists mainly in replacing all the remaining init_net occurences with current netns pointer retrieved from sockets, net devices or mfc_caches depending on the routines' contexts. Some routines receive a new 'struct net' parameter to propagate the current netns: * vif_add/vif_delete * ipmr_new_tunnel * mroute_clean_tables * ipmr_cache_find * ipmr_cache_report * ipmr_cache_unresolved * ipmr_mfc_add/ipmr_mfc_delete * ipmr_get_route * rt_fill_info (in route.c) Signed-off-by: Benjamin Thery Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 243 ++++++++++++++++++++++++++++++------------------------- net/ipv4/route.c | 11 +-- 2 files changed, 141 insertions(+), 113 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a4fd97f1920c..21a6dc710f20 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -95,7 +95,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __read_mostly; static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local); -static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert); +static int ipmr_cache_report(struct net *net, + struct sk_buff *pkt, vifi_t vifi, int assert); static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm); #ifdef CONFIG_IP_PIMSM_V2 @@ -108,9 +109,11 @@ static struct timer_list ipmr_expire_timer; static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) { + struct net *net = dev_net(dev); + dev_close(dev); - dev = __dev_get_by_name(&init_net, "tunl0"); + dev = __dev_get_by_name(net, "tunl0"); if (dev) { const struct net_device_ops *ops = dev->netdev_ops; struct ifreq ifr; @@ -136,11 +139,11 @@ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) } static -struct net_device *ipmr_new_tunnel(struct vifctl *v) +struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v) { struct net_device *dev; - dev = __dev_get_by_name(&init_net, "tunl0"); + dev = __dev_get_by_name(net, "tunl0"); if (dev) { const struct net_device_ops *ops = dev->netdev_ops; @@ -169,7 +172,8 @@ struct net_device *ipmr_new_tunnel(struct vifctl *v) dev = NULL; - if (err == 0 && (dev = __dev_get_by_name(&init_net, p.name)) != NULL) { + if (err == 0 && + (dev = __dev_get_by_name(net, p.name)) != NULL) { dev->flags |= IFF_MULTICAST; in_dev = __in_dev_get_rtnl(dev); @@ -199,10 +203,13 @@ failure: static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev) { + struct net *net = dev_net(dev); + read_lock(&mrt_lock); dev->stats.tx_bytes += skb->len; dev->stats.tx_packets++; - ipmr_cache_report(skb, init_net.ipv4.mroute_reg_vif_num, IGMPMSG_WHOLEPKT); + ipmr_cache_report(net, skb, net->ipv4.mroute_reg_vif_num, + IGMPMSG_WHOLEPKT); read_unlock(&mrt_lock); kfree_skb(skb); return 0; @@ -269,16 +276,16 @@ failure: * @notify: Set to 1, if the caller is a notifier_call */ -static int vif_delete(int vifi, int notify) +static int vif_delete(struct net *net, int vifi, int notify) { struct vif_device *v; struct net_device *dev; struct in_device *in_dev; - if (vifi < 0 || vifi >= init_net.ipv4.maxvif) + if (vifi < 0 || vifi >= net->ipv4.maxvif) return -EADDRNOTAVAIL; - v = &init_net.ipv4.vif_table[vifi]; + v = &net->ipv4.vif_table[vifi]; write_lock_bh(&mrt_lock); dev = v->dev; @@ -290,17 +297,17 @@ static int vif_delete(int vifi, int notify) } #ifdef CONFIG_IP_PIMSM - if (vifi == init_net.ipv4.mroute_reg_vif_num) - init_net.ipv4.mroute_reg_vif_num = -1; + if (vifi == net->ipv4.mroute_reg_vif_num) + net->ipv4.mroute_reg_vif_num = -1; #endif - if (vifi+1 == init_net.ipv4.maxvif) { + if (vifi+1 == net->ipv4.maxvif) { int tmp; for (tmp=vifi-1; tmp>=0; tmp--) { - if (VIF_EXISTS(&init_net, tmp)) + if (VIF_EXISTS(net, tmp)) break; } - init_net.ipv4.maxvif = tmp+1; + net->ipv4.maxvif = tmp+1; } write_unlock_bh(&mrt_lock); @@ -333,8 +340,9 @@ static void ipmr_destroy_unres(struct mfc_cache *c) { struct sk_buff *skb; struct nlmsgerr *e; + struct net *net = mfc_net(c); - atomic_dec(&init_net.ipv4.cache_resolve_queue_len); + atomic_dec(&net->ipv4.cache_resolve_queue_len); while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) { if (ip_hdr(skb)->version == 0) { @@ -346,7 +354,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c) e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).pid); } else kfree_skb(skb); } @@ -401,13 +409,14 @@ out: static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) { int vifi; + struct net *net = mfc_net(cache); cache->mfc_un.res.minvif = MAXVIFS; cache->mfc_un.res.maxvif = 0; memset(cache->mfc_un.res.ttls, 255, MAXVIFS); - for (vifi = 0; vifi < init_net.ipv4.maxvif; vifi++) { - if (VIF_EXISTS(&init_net, vifi) && + for (vifi = 0; vifi < net->ipv4.maxvif; vifi++) { + if (VIF_EXISTS(net, vifi) && ttls[vifi] && ttls[vifi] < 255) { cache->mfc_un.res.ttls[vifi] = ttls[vifi]; if (cache->mfc_un.res.minvif > vifi) @@ -418,16 +427,16 @@ static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) } } -static int vif_add(struct vifctl *vifc, int mrtsock) +static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock) { int vifi = vifc->vifc_vifi; - struct vif_device *v = &init_net.ipv4.vif_table[vifi]; + struct vif_device *v = &net->ipv4.vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; int err; /* Is vif busy ? */ - if (VIF_EXISTS(&init_net, vifi)) + if (VIF_EXISTS(net, vifi)) return -EADDRINUSE; switch (vifc->vifc_flags) { @@ -437,7 +446,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) * Special Purpose VIF in PIM * All the packets will be sent to the daemon */ - if (init_net.ipv4.mroute_reg_vif_num >= 0) + if (net->ipv4.mroute_reg_vif_num >= 0) return -EADDRINUSE; dev = ipmr_reg_vif(); if (!dev) @@ -451,7 +460,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) break; #endif case VIFF_TUNNEL: - dev = ipmr_new_tunnel(vifc); + dev = ipmr_new_tunnel(net, vifc); if (!dev) return -ENOBUFS; err = dev_set_allmulti(dev, 1); @@ -462,7 +471,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) } break; case 0: - dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); + dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr); if (!dev) return -EADDRNOTAVAIL; err = dev_set_allmulti(dev, 1); @@ -503,20 +512,22 @@ static int vif_add(struct vifctl *vifc, int mrtsock) v->dev = dev; #ifdef CONFIG_IP_PIMSM if (v->flags&VIFF_REGISTER) - init_net.ipv4.mroute_reg_vif_num = vifi; + net->ipv4.mroute_reg_vif_num = vifi; #endif - if (vifi+1 > init_net.ipv4.maxvif) - init_net.ipv4.maxvif = vifi+1; + if (vifi+1 > net->ipv4.maxvif) + net->ipv4.maxvif = vifi+1; write_unlock_bh(&mrt_lock); return 0; } -static struct mfc_cache *ipmr_cache_find(__be32 origin, __be32 mcastgrp) +static struct mfc_cache *ipmr_cache_find(struct net *net, + __be32 origin, + __be32 mcastgrp) { int line = MFC_HASH(mcastgrp, origin); struct mfc_cache *c; - for (c = init_net.ipv4.mfc_cache_array[line]; c; c = c->next) { + for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) { if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp) break; } @@ -576,7 +587,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid); } else ip_mr_forward(skb, c, 0); } @@ -589,7 +600,8 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) * Called under mrt_lock. */ -static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) +static int ipmr_cache_report(struct net *net, + struct sk_buff *pkt, vifi_t vifi, int assert) { struct sk_buff *skb; const int ihl = ip_hdrlen(pkt); @@ -621,7 +633,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr)); msg->im_msgtype = IGMPMSG_WHOLEPKT; msg->im_mbz = 0; - msg->im_vif = init_net.ipv4.mroute_reg_vif_num; + msg->im_vif = net->ipv4.mroute_reg_vif_num; ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2; ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) + sizeof(struct iphdr)); @@ -653,7 +665,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) skb->transport_header = skb->network_header; } - if (init_net.ipv4.mroute_sk == NULL) { + if (net->ipv4.mroute_sk == NULL) { kfree_skb(skb); return -EINVAL; } @@ -661,7 +673,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) /* * Deliver to mrouted */ - ret = sock_queue_rcv_skb(init_net.ipv4.mroute_sk, skb); + ret = sock_queue_rcv_skb(net->ipv4.mroute_sk, skb); if (ret < 0) { if (net_ratelimit()) printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n"); @@ -676,7 +688,7 @@ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert) */ static int -ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) +ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb) { int err; struct mfc_cache *c; @@ -684,7 +696,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) spin_lock_bh(&mfc_unres_lock); for (c=mfc_unres_queue; c; c=c->next) { - if (net_eq(mfc_net(c), &init_net) && + if (net_eq(mfc_net(c), net) && c->mfc_mcastgrp == iph->daddr && c->mfc_origin == iph->saddr) break; @@ -695,8 +707,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) * Create a new entry if allowable */ - if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) >= 10 || - (c = ipmr_cache_alloc_unres(&init_net)) == NULL) { + if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 || + (c = ipmr_cache_alloc_unres(net)) == NULL) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); @@ -713,7 +725,8 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) /* * Reflect first query at mrouted. */ - if ((err = ipmr_cache_report(skb, vifi, IGMPMSG_NOCACHE))<0) { + err = ipmr_cache_report(net, skb, vifi, IGMPMSG_NOCACHE); + if (err < 0) { /* If the report failed throw the cache entry out - Brad Parker */ @@ -724,7 +737,7 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) return err; } - atomic_inc(&init_net.ipv4.cache_resolve_queue_len); + atomic_inc(&net->ipv4.cache_resolve_queue_len); c->next = mfc_unres_queue; mfc_unres_queue = c; @@ -750,14 +763,14 @@ ipmr_cache_unresolved(vifi_t vifi, struct sk_buff *skb) * MFC cache manipulation by user space mroute daemon */ -static int ipmr_mfc_delete(struct mfcctl *mfc) +static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc) { int line; struct mfc_cache *c, **cp; line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); - for (cp = &init_net.ipv4.mfc_cache_array[line]; + for (cp = &net->ipv4.mfc_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) { @@ -772,14 +785,14 @@ static int ipmr_mfc_delete(struct mfcctl *mfc) return -ENOENT; } -static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) +static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock) { int line; struct mfc_cache *uc, *c, **cp; line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr); - for (cp = &init_net.ipv4.mfc_cache_array[line]; + for (cp = &net->ipv4.mfc_cache_array[line]; (c = *cp) != NULL; cp = &c->next) { if (c->mfc_origin == mfc->mfcc_origin.s_addr && c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) @@ -799,7 +812,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr)) return -EINVAL; - c = ipmr_cache_alloc(&init_net); + c = ipmr_cache_alloc(net); if (c == NULL) return -ENOMEM; @@ -811,8 +824,8 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) c->mfc_flags |= MFC_STATIC; write_lock_bh(&mrt_lock); - c->next = init_net.ipv4.mfc_cache_array[line]; - init_net.ipv4.mfc_cache_array[line] = c; + c->next = net->ipv4.mfc_cache_array[line]; + net->ipv4.mfc_cache_array[line] = c; write_unlock_bh(&mrt_lock); /* @@ -822,11 +835,11 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) spin_lock_bh(&mfc_unres_lock); for (cp = &mfc_unres_queue; (uc=*cp) != NULL; cp = &uc->next) { - if (net_eq(mfc_net(uc), &init_net) && + if (net_eq(mfc_net(uc), net) && uc->mfc_origin == c->mfc_origin && uc->mfc_mcastgrp == c->mfc_mcastgrp) { *cp = uc->next; - atomic_dec(&init_net.ipv4.cache_resolve_queue_len); + atomic_dec(&net->ipv4.cache_resolve_queue_len); break; } } @@ -845,16 +858,16 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) * Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct sock *sk) +static void mroute_clean_tables(struct net *net) { int i; /* * Shut down all active vif entries */ - for (i = 0; i < init_net.ipv4.maxvif; i++) { - if (!(init_net.ipv4.vif_table[i].flags&VIFF_STATIC)) - vif_delete(i, 0); + for (i = 0; i < net->ipv4.maxvif; i++) { + if (!(net->ipv4.vif_table[i].flags&VIFF_STATIC)) + vif_delete(net, i, 0); } /* @@ -863,7 +876,7 @@ static void mroute_clean_tables(struct sock *sk) for (i=0; iipv4.mfc_cache_array[i]; while ((c = *cp) != NULL) { if (c->mfc_flags&MFC_STATIC) { cp = &c->next; @@ -877,13 +890,13 @@ static void mroute_clean_tables(struct sock *sk) } } - if (atomic_read(&init_net.ipv4.cache_resolve_queue_len) != 0) { + if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) { struct mfc_cache *c, **cp; spin_lock_bh(&mfc_unres_lock); cp = &mfc_unres_queue; while ((c = *cp) != NULL) { - if (!net_eq(mfc_net(c), &init_net)) { + if (!net_eq(mfc_net(c), net)) { cp = &c->next; continue; } @@ -897,15 +910,17 @@ static void mroute_clean_tables(struct sock *sk) static void mrtsock_destruct(struct sock *sk) { + struct net *net = sock_net(sk); + rtnl_lock(); - if (sk == init_net.ipv4.mroute_sk) { - IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)--; + if (sk == net->ipv4.mroute_sk) { + IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; write_lock_bh(&mrt_lock); - init_net.ipv4.mroute_sk = NULL; + net->ipv4.mroute_sk = NULL; write_unlock_bh(&mrt_lock); - mroute_clean_tables(sk); + mroute_clean_tables(net); } rtnl_unlock(); } @@ -922,9 +937,10 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int int ret; struct vifctl vif; struct mfcctl mfc; + struct net *net = sock_net(sk); if (optname != MRT_INIT) { - if (sk != init_net.ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) + if (sk != net->ipv4.mroute_sk && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -937,7 +953,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int return -ENOPROTOOPT; rtnl_lock(); - if (init_net.ipv4.mroute_sk) { + if (net->ipv4.mroute_sk) { rtnl_unlock(); return -EADDRINUSE; } @@ -945,15 +961,15 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int ret = ip_ra_control(sk, 1, mrtsock_destruct); if (ret == 0) { write_lock_bh(&mrt_lock); - init_net.ipv4.mroute_sk = sk; + net->ipv4.mroute_sk = sk; write_unlock_bh(&mrt_lock); - IPV4_DEVCONF_ALL(sock_net(sk), MC_FORWARDING)++; + IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; } rtnl_unlock(); return ret; case MRT_DONE: - if (sk != init_net.ipv4.mroute_sk) + if (sk != net->ipv4.mroute_sk) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: @@ -966,9 +982,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int return -ENFILE; rtnl_lock(); if (optname == MRT_ADD_VIF) { - ret = vif_add(&vif, sk == init_net.ipv4.mroute_sk); + ret = vif_add(net, &vif, sk == net->ipv4.mroute_sk); } else { - ret = vif_delete(vif.vifc_vifi, 0); + ret = vif_delete(net, vif.vifc_vifi, 0); } rtnl_unlock(); return ret; @@ -985,9 +1001,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int return -EFAULT; rtnl_lock(); if (optname == MRT_DEL_MFC) - ret = ipmr_mfc_delete(&mfc); + ret = ipmr_mfc_delete(net, &mfc); else - ret = ipmr_mfc_add(&mfc, sk == init_net.ipv4.mroute_sk); + ret = ipmr_mfc_add(net, &mfc, sk == net->ipv4.mroute_sk); rtnl_unlock(); return ret; /* @@ -998,7 +1014,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int int v; if (get_user(v,(int __user *)optval)) return -EFAULT; - init_net.ipv4.mroute_do_assert = (v) ? 1 : 0; + net->ipv4.mroute_do_assert = (v) ? 1 : 0; return 0; } #ifdef CONFIG_IP_PIMSM @@ -1012,11 +1028,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int rtnl_lock(); ret = 0; - if (v != init_net.ipv4.mroute_do_pim) { - init_net.ipv4.mroute_do_pim = v; - init_net.ipv4.mroute_do_assert = v; + if (v != net->ipv4.mroute_do_pim) { + net->ipv4.mroute_do_pim = v; + net->ipv4.mroute_do_assert = v; #ifdef CONFIG_IP_PIMSM_V2 - if (init_net.ipv4.mroute_do_pim) + if (net->ipv4.mroute_do_pim) ret = inet_add_protocol(&pim_protocol, IPPROTO_PIM); else @@ -1047,6 +1063,7 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int { int olr; int val; + struct net *net = sock_net(sk); if (optname != MRT_VERSION && #ifdef CONFIG_IP_PIMSM @@ -1068,10 +1085,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int val = 0x0305; #ifdef CONFIG_IP_PIMSM else if (optname == MRT_PIM) - val = init_net.ipv4.mroute_do_pim; + val = net->ipv4.mroute_do_pim; #endif else - val = init_net.ipv4.mroute_do_assert; + val = net->ipv4.mroute_do_assert; if (copy_to_user(optval, &val, olr)) return -EFAULT; return 0; @@ -1087,16 +1104,17 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) struct sioc_vif_req vr; struct vif_device *vif; struct mfc_cache *c; + struct net *net = sock_net(sk); switch (cmd) { case SIOCGETVIFCNT: if (copy_from_user(&vr, arg, sizeof(vr))) return -EFAULT; - if (vr.vifi >= init_net.ipv4.maxvif) + if (vr.vifi >= net->ipv4.maxvif) return -EINVAL; read_lock(&mrt_lock); - vif = &init_net.ipv4.vif_table[vr.vifi]; - if (VIF_EXISTS(&init_net, vr.vifi)) { + vif = &net->ipv4.vif_table[vr.vifi]; + if (VIF_EXISTS(net, vr.vifi)) { vr.icount = vif->pkt_in; vr.ocount = vif->pkt_out; vr.ibytes = vif->bytes_in; @@ -1114,7 +1132,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; read_lock(&mrt_lock); - c = ipmr_cache_find(sr.src.s_addr, sr.grp.s_addr); + c = ipmr_cache_find(net, sr.src.s_addr, sr.grp.s_addr); if (c) { sr.pktcnt = c->mfc_un.res.pkt; sr.bytecnt = c->mfc_un.res.bytes; @@ -1136,18 +1154,19 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; + struct net *net = dev_net(dev); struct vif_device *v; int ct; - if (!net_eq(dev_net(dev), &init_net)) + if (!net_eq(dev_net(dev), net)) return NOTIFY_DONE; if (event != NETDEV_UNREGISTER) return NOTIFY_DONE; - v = &init_net.ipv4.vif_table[0]; - for (ct = 0; ct < init_net.ipv4.maxvif; ct++, v++) { + v = &net->ipv4.vif_table[0]; + for (ct = 0; ct < net->ipv4.maxvif; ct++, v++) { if (v->dev == dev) - vif_delete(ct, 1); + vif_delete(net, ct, 1); } return NOTIFY_DONE; } @@ -1207,8 +1226,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) { + struct net *net = mfc_net(c); const struct iphdr *iph = ip_hdr(skb); - struct vif_device *vif = &init_net.ipv4.vif_table[vifi]; + struct vif_device *vif = &net->ipv4.vif_table[vifi]; struct net_device *dev; struct rtable *rt; int encap = 0; @@ -1222,7 +1242,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) vif->bytes_out += skb->len; vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; - ipmr_cache_report(skb, vifi, IGMPMSG_WHOLEPKT); + ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT); kfree_skb(skb); return; } @@ -1235,7 +1255,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) .saddr = vif->local, .tos = RT_TOS(iph->tos) } }, .proto = IPPROTO_IPIP }; - if (ip_route_output_key(&init_net, &rt, &fl)) + if (ip_route_output_key(net, &rt, &fl)) goto out_free; encap = sizeof(struct iphdr); } else { @@ -1244,7 +1264,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) { .daddr = iph->daddr, .tos = RT_TOS(iph->tos) } }, .proto = IPPROTO_IPIP }; - if (ip_route_output_key(&init_net, &rt, &fl)) + if (ip_route_output_key(net, &rt, &fl)) goto out_free; } @@ -1308,9 +1328,10 @@ out_free: static int ipmr_find_vif(struct net_device *dev) { + struct net *net = dev_net(dev); int ct; - for (ct = init_net.ipv4.maxvif-1; ct >= 0; ct--) { - if (init_net.ipv4.vif_table[ct].dev == dev) + for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) { + if (net->ipv4.vif_table[ct].dev == dev) break; } return ct; @@ -1322,6 +1343,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local { int psend = -1; int vif, ct; + struct net *net = mfc_net(cache); vif = cache->mfc_parent; cache->mfc_un.res.pkt++; @@ -1330,7 +1352,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (init_net.ipv4.vif_table[vif].dev != skb->dev) { + if (net->ipv4.vif_table[vif].dev != skb->dev) { int true_vifi; if (skb->rtable->fl.iif == 0) { @@ -1351,24 +1373,24 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local cache->mfc_un.res.wrong_if++; true_vifi = ipmr_find_vif(skb->dev); - if (true_vifi >= 0 && init_net.ipv4.mroute_do_assert && + if (true_vifi >= 0 && net->ipv4.mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, so that we cannot check that packet arrived on an oif. It is bad, but otherwise we would need to move pretty large chunk of pimd to kernel. Ough... --ANK */ - (init_net.ipv4.mroute_do_pim || + (net->ipv4.mroute_do_pim || cache->mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { cache->mfc_un.res.last_assert = jiffies; - ipmr_cache_report(skb, true_vifi, IGMPMSG_WRONGVIF); + ipmr_cache_report(net, skb, true_vifi, IGMPMSG_WRONGVIF); } goto dont_forward; } - init_net.ipv4.vif_table[vif].pkt_in++; - init_net.ipv4.vif_table[vif].bytes_in += skb->len; + net->ipv4.vif_table[vif].pkt_in++; + net->ipv4.vif_table[vif].bytes_in += skb->len; /* * Forward the frame @@ -1408,6 +1430,7 @@ dont_forward: int ip_mr_input(struct sk_buff *skb) { struct mfc_cache *cache; + struct net *net = dev_net(skb->dev); int local = skb->rtable->rt_flags&RTCF_LOCAL; /* Packet is looped back after forward, it should not be @@ -1428,9 +1451,9 @@ int ip_mr_input(struct sk_buff *skb) that we can forward NO IGMP messages. */ read_lock(&mrt_lock); - if (init_net.ipv4.mroute_sk) { + if (net->ipv4.mroute_sk) { nf_reset(skb); - raw_rcv(init_net.ipv4.mroute_sk, skb); + raw_rcv(net->ipv4.mroute_sk, skb); read_unlock(&mrt_lock); return 0; } @@ -1439,7 +1462,7 @@ int ip_mr_input(struct sk_buff *skb) } read_lock(&mrt_lock); - cache = ipmr_cache_find(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); + cache = ipmr_cache_find(net, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); /* * No usable cache entry @@ -1459,7 +1482,7 @@ int ip_mr_input(struct sk_buff *skb) vif = ipmr_find_vif(skb->dev); if (vif >= 0) { - int err = ipmr_cache_unresolved(vif, skb); + int err = ipmr_cache_unresolved(net, vif, skb); read_unlock(&mrt_lock); return err; @@ -1490,6 +1513,7 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) { struct net_device *reg_dev = NULL; struct iphdr *encap; + struct net *net = dev_net(skb->dev); encap = (struct iphdr *)(skb_transport_header(skb) + pimlen); /* @@ -1504,8 +1528,8 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) return 1; read_lock(&mrt_lock); - if (init_net.ipv4.mroute_reg_vif_num >= 0) - reg_dev = init_net.ipv4.vif_table[init_net.ipv4.mroute_reg_vif_num].dev; + if (net->ipv4.mroute_reg_vif_num >= 0) + reg_dev = net->ipv4.vif_table[net->ipv4.mroute_reg_vif_num].dev; if (reg_dev) dev_hold(reg_dev); read_unlock(&mrt_lock); @@ -1540,13 +1564,14 @@ static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen) int pim_rcv_v1(struct sk_buff * skb) { struct igmphdr *pim; + struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr))) goto drop; pim = igmp_hdr(skb); - if (!init_net.ipv4.mroute_do_pim || + if (!net->ipv4.mroute_do_pim || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; @@ -1586,7 +1611,8 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) { int ct; struct rtnexthop *nhp; - struct net_device *dev = init_net.ipv4.vif_table[c->mfc_parent].dev; + struct net *net = mfc_net(c); + struct net_device *dev = net->ipv4.vif_table[c->mfc_parent].dev; u8 *b = skb_tail_pointer(skb); struct rtattr *mp_head; @@ -1602,7 +1628,7 @@ ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm) nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = init_net.ipv4.vif_table[ct].dev->ifindex; + nhp->rtnh_ifindex = net->ipv4.vif_table[ct].dev->ifindex; nhp->rtnh_len = sizeof(*nhp); } } @@ -1616,14 +1642,15 @@ rtattr_failure: return -EMSGSIZE; } -int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) +int ipmr_get_route(struct net *net, + struct sk_buff *skb, struct rtmsg *rtm, int nowait) { int err; struct mfc_cache *cache; struct rtable *rt = skb->rtable; read_lock(&mrt_lock); - cache = ipmr_cache_find(rt->rt_src, rt->rt_dst); + cache = ipmr_cache_find(net, rt->rt_src, rt->rt_dst); if (cache == NULL) { struct sk_buff *skb2; @@ -1654,7 +1681,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) iph->saddr = rt->rt_src; iph->daddr = rt->rt_dst; iph->version = 0; - err = ipmr_cache_unresolved(vif, skb2); + err = ipmr_cache_unresolved(net, vif, skb2); read_unlock(&mrt_lock); return err; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97f71153584f..6a9e204c8024 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2779,7 +2779,8 @@ int ip_route_output_key(struct net *net, struct rtable **rp, struct flowi *flp) return ip_route_output_flow(net, rp, flp, NULL, 0); } -static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +static int rt_fill_info(struct net *net, + struct sk_buff *skb, u32 pid, u32 seq, int event, int nowait, unsigned int flags) { struct rtable *rt = skb->rtable; @@ -2844,8 +2845,8 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, __be32 dst = rt->rt_dst; if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && - IPV4_DEVCONF_ALL(&init_net, MC_FORWARDING)) { - int err = ipmr_get_route(skb, r, nowait); + IPV4_DEVCONF_ALL(net, MC_FORWARDING)) { + int err = ipmr_get_route(net, skb, r, nowait); if (err <= 0) { if (!nowait) { if (err == 0) @@ -2950,7 +2951,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + err = rt_fill_info(net, skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) goto errout_free; @@ -2988,7 +2989,7 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) if (rt_is_expired(rt)) continue; skb->dst = dst_clone(&rt->u.dst); - if (rt_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { dst_release(xchg(&skb->dst, NULL)); -- cgit v1.2.3 From afcf12422ec8236dc8b9238fef7a475876eea8da Mon Sep 17 00:00:00 2001 From: Timo Teras Date: Mon, 26 Jan 2009 20:56:10 -0800 Subject: gre: optimize hash lookup Instead of keeping candidate tunnel device from all categories, keep only one candidate with best score. This optimizes stack usage and speeds up exit code. Signed-off-by: Timo Teras Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 69 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 40 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4a43739c9035..07a188afb3ac 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -172,11 +172,11 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, int link = dev->ifindex; unsigned h0 = HASH(remote); unsigned h1 = HASH(key); - struct ip_tunnel *t, *sel[4] = { NULL, NULL, NULL, NULL }; + struct ip_tunnel *t, *cand = NULL; struct ipgre_net *ign = net_generic(net, ipgre_net_id); int dev_type = (gre_proto == htons(ETH_P_TEB)) ? ARPHRD_ETHER : ARPHRD_IPGRE; - int idx; + int score, cand_score = 4; for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { if (local != t->parms.iph.saddr || @@ -189,15 +189,18 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, t->dev->type != dev_type) continue; - idx = 0; + score = 0; if (t->parms.link != link) - idx |= 1; + score |= 1; if (t->dev->type != dev_type) - idx |= 2; - if (idx == 0) + score |= 2; + if (score == 0) return t; - if (sel[idx] == NULL) - sel[idx] = t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } } for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { @@ -210,15 +213,18 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, t->dev->type != dev_type) continue; - idx = 0; + score = 0; if (t->parms.link != link) - idx |= 1; + score |= 1; if (t->dev->type != dev_type) - idx |= 2; - if (idx == 0) + score |= 2; + if (score == 0) return t; - if (sel[idx] == NULL) - sel[idx] = t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } } for (t = ign->tunnels_l[h1]; t; t = t->next) { @@ -233,15 +239,18 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, t->dev->type != dev_type) continue; - idx = 0; + score = 0; if (t->parms.link != link) - idx |= 1; + score |= 1; if (t->dev->type != dev_type) - idx |= 2; - if (idx == 0) + score |= 2; + if (score == 0) return t; - if (sel[idx] == NULL) - sel[idx] = t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } } for (t = ign->tunnels_wc[h1]; t; t = t->next) { @@ -253,20 +262,22 @@ static struct ip_tunnel * ipgre_tunnel_lookup(struct net_device *dev, t->dev->type != dev_type) continue; - idx = 0; + score = 0; if (t->parms.link != link) - idx |= 1; + score |= 1; if (t->dev->type != dev_type) - idx |= 2; - if (idx == 0) + score |= 2; + if (score == 0) return t; - if (sel[idx] == NULL) - sel[idx] = t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } } - for (idx = 1; idx < ARRAY_SIZE(sel); idx++) - if (sel[idx] != NULL) - return sel[idx]; + if (cand != NULL) + return cand; if (ign->fb_tunnel_dev->flags & IFF_UP) return netdev_priv(ign->fb_tunnel_dev); -- cgit v1.2.3 From 5075138d67ac66adab777163907d92d1a955ff50 Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Fri, 23 Jan 2009 03:00:25 +0000 Subject: Phonet: move to Networking options like other protocol stacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index cdb8fdef6c4a..a12bae0e3fe9 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -185,6 +185,7 @@ source "net/x25/Kconfig" source "net/lapb/Kconfig" source "net/econet/Kconfig" source "net/wanrouter/Kconfig" +source "net/phonet/Kconfig" source "net/sched/Kconfig" source "net/dcb/Kconfig" @@ -229,7 +230,6 @@ source "net/can/Kconfig" source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" -source "net/phonet/Kconfig" config FIB_RULES bool -- cgit v1.2.3 From 4b8f704bea70a2c8719e47f53197678a87a0c62f Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Fri, 23 Jan 2009 03:00:26 +0000 Subject: Phonet: check destination before delivering packets locally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 13cb323f8c38..c7c39d92ee5e 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -275,8 +275,6 @@ static inline int can_respond(struct sk_buff *skb) return 0; ph = pn_hdr(skb); - if (phonet_address_get(skb->dev, ph->pn_rdev) != ph->pn_rdev) - return 0; /* we are not the destination */ if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) return 0; if (ph->pn_res == PN_COMMGR) /* indications */ @@ -344,8 +342,8 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pkttype, struct net_device *orig_dev) { + struct net *net = dev_net(dev); struct phonethdr *ph; - struct sock *sk; struct sockaddr_pn sa; u16 len; @@ -364,21 +362,21 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, skb_reset_transport_header(skb); pn_skb_get_dst_sockaddr(skb, &sa); - if (pn_sockaddr_get_addr(&sa) == 0) - goto out; /* currently, we cannot be device 0 */ - sk = pn_find_sock_by_sa(dev_net(dev), &sa); - if (sk == NULL) { + /* check if we are the destination */ + if (phonet_address_lookup(net, pn_sockaddr_get_addr(&sa)) == 0) { + /* Phonet packet input */ + struct sock *sk = pn_find_sock_by_sa(net, &sa); + + if (sk) + return sk_receive_skb(sk, skb, 0); + if (can_respond(skb)) { send_obj_unreachable(skb); send_reset_indications(skb); } - goto out; } - /* Push data to the socket (or other sockets connected to it). */ - return sk_receive_skb(sk, skb, 0); - out: kfree_skb(skb); return NET_RX_DROP; -- cgit v1.2.3 From 76e02cf6945e6faa9f6b546dc0513512197c5966 Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Fri, 23 Jan 2009 03:00:27 +0000 Subject: Phonet: allow phonet_device_init() to fail, put it to __init section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/af_phonet.c | 9 ++++++--- net/phonet/pn_dev.c | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index c7c39d92ee5e..95bc49ddb8bf 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -426,16 +426,18 @@ static int __init phonet_init(void) { int err; + err = phonet_device_init(); + if (err) + return err; + err = sock_register(&phonet_proto_family); if (err) { printk(KERN_ALERT "phonet protocol family initialization failed\n"); - return err; + goto err_sock; } - phonet_device_init(); dev_add_pack(&phonet_packet_type); - phonet_netlink_register(); phonet_sysctl_init(); err = isi_register(); @@ -447,6 +449,7 @@ err: phonet_sysctl_exit(); sock_unregister(PF_PHONET); dev_remove_pack(&phonet_packet_type); +err_sock: phonet_device_exit(); return err; } diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 5491bf5e354b..af49db01d634 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -188,9 +188,11 @@ static struct notifier_block phonet_device_notifier = { }; /* Initialize Phonet devices list */ -void phonet_device_init(void) +int __init phonet_device_init(void) { register_netdevice_notifier(&phonet_device_notifier); + phonet_netlink_register(); + return 0; } void phonet_device_exit(void) -- cgit v1.2.3 From 660f706d931d4795d341805e083a8091af74fa88 Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Fri, 23 Jan 2009 03:00:28 +0000 Subject: Phonet: handle rtnetlink registration failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 8 ++++++-- net/phonet/pn_netlink.c | 13 +++++++++---- 2 files changed, 15 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index af49db01d634..fd418107652b 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -190,9 +190,13 @@ static struct notifier_block phonet_device_notifier = { /* Initialize Phonet devices list */ int __init phonet_device_init(void) { + int err; + register_netdevice_notifier(&phonet_device_notifier); - phonet_netlink_register(); - return 0; + err = phonet_netlink_register(); + if (err) + phonet_device_exit(); + return err; } void phonet_device_exit(void) diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 242fe8f8c322..918a4f07f24a 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -160,9 +160,14 @@ out: return skb->len; } -void __init phonet_netlink_register(void) +int __init phonet_netlink_register(void) { - rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); - rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); - rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); + int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); + if (err) + return err; + + /* Further __rtnl_register() cannot fail */ + __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); + __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); + return 0; } -- cgit v1.2.3 From 6530e0fee1834fab51720769ac422186de2b3120 Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Fri, 23 Jan 2009 03:00:29 +0000 Subject: Phonet: remove useless locking in device cleanup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Incoming packets and sockets are already gone. The netdevice notifier is unregistered under the RTNL lock There remains a race with the rtnetlink handlers unregistration, but it is a generic RTNL issue that was already present before this change. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index fd418107652b..3e24c0522ee3 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -204,13 +204,8 @@ void phonet_device_exit(void) struct phonet_device *pnd, *n; rtnl_unregister_all(PF_PHONET); - rtnl_lock(); - spin_lock_bh(&pndevs.lock); + unregister_netdevice_notifier(&phonet_device_notifier); list_for_each_entry_safe(pnd, n, &pndevs.list, list) __phonet_device_free(pnd); - - spin_unlock_bh(&pndevs.lock); - rtnl_unlock(); - unregister_netdevice_notifier(&phonet_device_notifier); } -- cgit v1.2.3 From 9a3b7a42bb2919a6282a96a5f4abe0f9be36c4b3 Mon Sep 17 00:00:00 2001 From: "remi.denis-courmont@nokia" Date: Fri, 23 Jan 2009 03:00:30 +0000 Subject: Phonet: use per-namespace devices list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- net/phonet/pn_dev.c | 108 +++++++++++++++++++++++++++++++++--------------- net/phonet/pn_netlink.c | 11 +++-- 2 files changed, 80 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 3e24c0522ee3..80a322d77909 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -28,32 +28,41 @@ #include #include #include +#include #include -/* when accessing, remember to lock with spin_lock(&pndevs.lock); */ -struct phonet_device_list pndevs = { - .list = LIST_HEAD_INIT(pndevs.list), - .lock = __SPIN_LOCK_UNLOCKED(pndevs.lock), +struct phonet_net { + struct phonet_device_list pndevs; }; +int phonet_net_id; + +struct phonet_device_list *phonet_device_list(struct net *net) +{ + struct phonet_net *pnn = net_generic(net, phonet_net_id); + return &pnn->pndevs; +} + /* Allocate new Phonet device. */ static struct phonet_device *__phonet_device_alloc(struct net_device *dev) { + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); if (pnd == NULL) return NULL; pnd->netdev = dev; bitmap_zero(pnd->addrs, 64); - list_add(&pnd->list, &pndevs.list); + list_add(&pnd->list, &pndevs->list); return pnd; } static struct phonet_device *__phonet_get(struct net_device *dev) { + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; - list_for_each_entry(pnd, &pndevs.list, list) { + list_for_each_entry(pnd, &pndevs->list, list) { if (pnd->netdev == dev) return pnd; } @@ -68,32 +77,33 @@ static void __phonet_device_free(struct phonet_device *pnd) struct net_device *phonet_device_get(struct net *net) { + struct phonet_device_list *pndevs = phonet_device_list(net); struct phonet_device *pnd; struct net_device *dev; - spin_lock_bh(&pndevs.lock); - list_for_each_entry(pnd, &pndevs.list, list) { + spin_lock_bh(&pndevs->lock); + list_for_each_entry(pnd, &pndevs->list, list) { dev = pnd->netdev; BUG_ON(!dev); - if (net_eq(dev_net(dev), net) && - (dev->reg_state == NETREG_REGISTERED) && + if ((dev->reg_state == NETREG_REGISTERED) && ((pnd->netdev->flags & IFF_UP)) == IFF_UP) break; dev = NULL; } if (dev) dev_hold(dev); - spin_unlock_bh(&pndevs.lock); + spin_unlock_bh(&pndevs->lock); return dev; } int phonet_address_add(struct net_device *dev, u8 addr) { + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; int err = 0; - spin_lock_bh(&pndevs.lock); + spin_lock_bh(&pndevs->lock); /* Find or create Phonet-specific device data */ pnd = __phonet_get(dev); if (pnd == NULL) @@ -102,31 +112,33 @@ int phonet_address_add(struct net_device *dev, u8 addr) err = -ENOMEM; else if (test_and_set_bit(addr >> 2, pnd->addrs)) err = -EEXIST; - spin_unlock_bh(&pndevs.lock); + spin_unlock_bh(&pndevs->lock); return err; } int phonet_address_del(struct net_device *dev, u8 addr) { + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; int err = 0; - spin_lock_bh(&pndevs.lock); + spin_lock_bh(&pndevs->lock); pnd = __phonet_get(dev); if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) err = -EADDRNOTAVAIL; else if (bitmap_empty(pnd->addrs, 64)) __phonet_device_free(pnd); - spin_unlock_bh(&pndevs.lock); + spin_unlock_bh(&pndevs->lock); return err; } /* Gets a source address toward a destination, through a interface. */ u8 phonet_address_get(struct net_device *dev, u8 addr) { + struct phonet_device_list *pndevs = phonet_device_list(dev_net(dev)); struct phonet_device *pnd; - spin_lock_bh(&pndevs.lock); + spin_lock_bh(&pndevs->lock); pnd = __phonet_get(dev); if (pnd) { BUG_ON(bitmap_empty(pnd->addrs, 64)); @@ -136,30 +148,31 @@ u8 phonet_address_get(struct net_device *dev, u8 addr) addr = find_first_bit(pnd->addrs, 64) << 2; } else addr = PN_NO_ADDR; - spin_unlock_bh(&pndevs.lock); + spin_unlock_bh(&pndevs->lock); return addr; } int phonet_address_lookup(struct net *net, u8 addr) { + struct phonet_device_list *pndevs = phonet_device_list(net); struct phonet_device *pnd; + int err = -EADDRNOTAVAIL; - spin_lock_bh(&pndevs.lock); - list_for_each_entry(pnd, &pndevs.list, list) { - if (!net_eq(dev_net(pnd->netdev), net)) - continue; + spin_lock_bh(&pndevs->lock); + list_for_each_entry(pnd, &pndevs->list, list) { /* Don't allow unregistering devices! */ if ((pnd->netdev->reg_state != NETREG_REGISTERED) || ((pnd->netdev->flags & IFF_UP)) != IFF_UP) continue; if (test_bit(addr >> 2, pnd->addrs)) { - spin_unlock_bh(&pndevs.lock); - return 0; + err = 0; + goto found; } } - spin_unlock_bh(&pndevs.lock); - return -EADDRNOTAVAIL; +found: + spin_unlock_bh(&pndevs->lock); + return err; } /* notify Phonet of device events */ @@ -169,14 +182,16 @@ static int phonet_device_notify(struct notifier_block *me, unsigned long what, struct net_device *dev = arg; if (what == NETDEV_UNREGISTER) { + struct phonet_device_list *pndevs; struct phonet_device *pnd; /* Destroy phonet-specific device data */ - spin_lock_bh(&pndevs.lock); + pndevs = phonet_device_list(dev_net(dev)); + spin_lock_bh(&pndevs->lock); pnd = __phonet_get(dev); if (pnd) __phonet_device_free(pnd); - spin_unlock_bh(&pndevs.lock); + spin_unlock_bh(&pndevs->lock); } return 0; @@ -187,10 +202,41 @@ static struct notifier_block phonet_device_notifier = { .priority = 0, }; +/* Per-namespace Phonet devices handling */ +static int phonet_init_net(struct net *net) +{ + struct phonet_net *pnn = kmalloc(sizeof(*pnn), GFP_KERNEL); + if (!pnn) + return -ENOMEM; + + INIT_LIST_HEAD(&pnn->pndevs.list); + spin_lock_init(&pnn->pndevs.lock); + net_assign_generic(net, phonet_net_id, pnn); + return 0; +} + +static void phonet_exit_net(struct net *net) +{ + struct phonet_net *pnn = net_generic(net, phonet_net_id); + struct phonet_device *pnd, *n; + + list_for_each_entry_safe(pnd, n, &pnn->pndevs.list, list) + __phonet_device_free(pnd); + + kfree(pnn); +} + +static struct pernet_operations phonet_net_ops = { + .init = phonet_init_net, + .exit = phonet_exit_net, +}; + /* Initialize Phonet devices list */ int __init phonet_device_init(void) { - int err; + int err = register_pernet_gen_device(&phonet_net_id, &phonet_net_ops); + if (err) + return err; register_netdevice_notifier(&phonet_device_notifier); err = phonet_netlink_register(); @@ -201,11 +247,7 @@ int __init phonet_device_init(void) void phonet_device_exit(void) { - struct phonet_device *pnd, *n; - rtnl_unregister_all(PF_PHONET); unregister_netdevice_notifier(&phonet_device_notifier); - - list_for_each_entry_safe(pnd, n, &pndevs.list, list) - __phonet_device_free(pnd); + unregister_pernet_gen_device(phonet_net_id, &phonet_net_ops); } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 918a4f07f24a..1ceea1f92413 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -123,17 +123,16 @@ nla_put_failure: static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) { - struct net *net = sock_net(skb->sk); + struct phonet_device_list *pndevs; struct phonet_device *pnd; int dev_idx = 0, dev_start_idx = cb->args[0]; int addr_idx = 0, addr_start_idx = cb->args[1]; - spin_lock_bh(&pndevs.lock); - list_for_each_entry(pnd, &pndevs.list, list) { + pndevs = phonet_device_list(sock_net(skb->sk)); + spin_lock_bh(&pndevs->lock); + list_for_each_entry(pnd, &pndevs->list, list) { u8 addr; - if (!net_eq(dev_net(pnd->netdev), net)) - continue; if (dev_idx > dev_start_idx) addr_start_idx = 0; if (dev_idx++ < dev_start_idx) @@ -153,7 +152,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) } out: - spin_unlock_bh(&pndevs.lock); + spin_unlock_bh(&pndevs->lock); cb->args[0] = dev_idx; cb->args[1] = addr_idx; -- cgit v1.2.3 From d5a9e24afb4ab38110ebb777588ea0bd0eacbd0a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Jan 2009 16:22:11 -0800 Subject: net: Allow RX queue selection to seed TX queue hashing. The idea is that drivers which implement multiqueue RX pre-seed the SKB by recording the RX queue selected by the hardware. If such a seed is found on TX, we'll use that to select the outgoing TX queue. This helps get more consistent load balancing on router and firewall loads. Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5379b0c1190a..b21ad0b47aae 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1722,6 +1722,13 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) simple_tx_hashrnd_initialized = 1; } + if (skb_rx_queue_recorded(skb)) { + u32 val = skb_get_rx_queue(skb); + + hash = jhash_1word(val, simple_tx_hashrnd); + goto out; + } + switch (skb->protocol) { case htons(ETH_P_IP): if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) @@ -1759,6 +1766,7 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); +out: return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } -- cgit v1.2.3 From f7105d63940899ece79bda024f668e6c761cfebf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Jan 2009 16:27:48 -0800 Subject: net: If SKB has attached socket, use socket's hash for TX queue selection. Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index b21ad0b47aae..cb8caa93caca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1729,6 +1729,13 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) goto out; } + if (skb->sk && skb->sk->sk_hash) { + u32 val = skb->sk->sk_hash; + + hash = jhash_1word(val, simple_tx_hashrnd); + goto out; + } + switch (skb->protocol) { case htons(ETH_P_IP): if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) -- cgit v1.2.3 From 7019298a2a5058c4e324494d6c8d0598214c28f4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 27 Jan 2009 16:34:47 -0800 Subject: net: Get rid of by-hand TX queue hashing. We now only TX hash on pre-computed SKB properties. The thinking is: 1) High performance routing and firewalling setups will have a multiqueue capable card used for receive, and therefore would have RX queue recordings made into the SKB which can be used for the TX side hash. 2) Locally generated packets will have an attached socket and thus a valid sk->sk_hash to make use of. Signed-off-by: David S. Miller --- net/core/dev.c | 73 +++++++++++----------------------------------------------- 1 file changed, 14 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index cb8caa93caca..e61b95c11fc0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1708,72 +1708,27 @@ out_kfree_skb: return 0; } -static u32 simple_tx_hashrnd; -static int simple_tx_hashrnd_initialized = 0; +static u32 skb_tx_hashrnd; +static int skb_tx_hashrnd_initialized = 0; -static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) +static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb) { - u32 addr1, addr2, ports; - u32 hash, ihl; - u8 ip_proto = 0; + u32 hash; - if (unlikely(!simple_tx_hashrnd_initialized)) { - get_random_bytes(&simple_tx_hashrnd, 4); - simple_tx_hashrnd_initialized = 1; + if (unlikely(!skb_tx_hashrnd_initialized)) { + get_random_bytes(&skb_tx_hashrnd, 4); + skb_tx_hashrnd_initialized = 1; } if (skb_rx_queue_recorded(skb)) { - u32 val = skb_get_rx_queue(skb); - - hash = jhash_1word(val, simple_tx_hashrnd); - goto out; - } - - if (skb->sk && skb->sk->sk_hash) { - u32 val = skb->sk->sk_hash; - - hash = jhash_1word(val, simple_tx_hashrnd); - goto out; - } - - switch (skb->protocol) { - case htons(ETH_P_IP): - if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) - ip_proto = ip_hdr(skb)->protocol; - addr1 = ip_hdr(skb)->saddr; - addr2 = ip_hdr(skb)->daddr; - ihl = ip_hdr(skb)->ihl; - break; - case htons(ETH_P_IPV6): - ip_proto = ipv6_hdr(skb)->nexthdr; - addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; - addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; - ihl = (40 >> 2); - break; - default: - return 0; - } - - - switch (ip_proto) { - case IPPROTO_TCP: - case IPPROTO_UDP: - case IPPROTO_DCCP: - case IPPROTO_ESP: - case IPPROTO_AH: - case IPPROTO_SCTP: - case IPPROTO_UDPLITE: - ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); - break; - - default: - ports = 0; - break; - } + hash = skb_get_rx_queue(skb); + } else if (skb->sk && skb->sk->sk_hash) { + hash = skb->sk->sk_hash; + } else + hash = skb->protocol; - hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd); + hash = jhash_1word(hash, skb_tx_hashrnd); -out: return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } @@ -1786,7 +1741,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev, if (ops->ndo_select_queue) queue_index = ops->ndo_select_queue(dev, skb); else if (dev->real_num_tx_queues > 1) - queue_index = simple_tx_hash(dev, skb); + queue_index = skb_tx_hash(dev, skb); skb_set_queue_mapping(skb, queue_index); return netdev_get_tx_queue(dev, queue_index); -- cgit v1.2.3 From eb46936b9f2b639f4edeeaf9154d49476fc30fe5 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Tue, 23 Dec 2008 21:30:50 +0530 Subject: mac80211: Scale down to non-HT association with TKIP/WEP as pairwise cipher As TKIP is not updated to new security needs which arise when TKIP is used to encrypt A-MPDU aggregated data frames, IEEE802.11n does not allow any cipher other than CCMP (Which has new extensions defined) as pairwise cipher between HT peers. When such configuration (TKIP/WEP in HT) is forced, we still associate in non-HT mode (11a/b/g). Signed-off-by: Vasanthakumar Thiagarajan Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 3 ++- net/mac80211/mlme.c | 9 ++++++++- net/mac80211/wext.c | 12 +++++++++++- 4 files changed, 22 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f3eec989662b..5f8ad885a48a 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -258,6 +258,7 @@ struct mesh_preq_queue { #define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) #define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) #define IEEE80211_STA_PRIVACY_INVOKED BIT(13) +#define IEEE80211_STA_TKIP_WEP_USED BIT(14) /* flags for MLME request */ #define IEEE80211_STA_REQ_SCAN 0 #define IEEE80211_STA_REQ_DIRECT_PROBE 1 diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b9074824862a..1eefc5df4954 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -459,7 +459,8 @@ static int ieee80211_stop(struct net_device *dev) synchronize_rcu(); skb_queue_purge(&sdata->u.sta.skb_queue); - sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; + sdata->u.sta.flags &= ~(IEEE80211_STA_PRIVACY_INVOKED | + IEEE80211_STA_TKIP_WEP_USED); kfree(sdata->u.sta.extra_ie); sdata->u.sta.extra_ie = NULL; sdata->u.sta.extra_ie_len = 0; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2b890af01ba4..b688425d7555 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -391,10 +391,17 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, } /* wmm support is a must to HT */ + /* + * IEEE802.11n does not allow TKIP/WEP as pairwise + * ciphers in HT mode. We still associate in non-ht + * mode (11a/b/g) if any one of these ciphers is + * configured as pairwise. + */ if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && sband->ht_cap.ht_supported && (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) && - ht_ie[1] >= sizeof(struct ieee80211_ht_info)) { + ht_ie[1] >= sizeof(struct ieee80211_ht_info) && + (!(ifsta->flags & IEEE80211_STA_TKIP_WEP_USED))) { struct ieee80211_ht_info *ht_info = (struct ieee80211_ht_info *)(ht_ie + 2); u16 cap = sband->ht_cap.cap; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 7162d5816f39..011592fd4528 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -903,12 +903,22 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, switch (data->flags & IW_AUTH_INDEX) { case IW_AUTH_WPA_VERSION: - case IW_AUTH_CIPHER_PAIRWISE: case IW_AUTH_CIPHER_GROUP: case IW_AUTH_WPA_ENABLED: case IW_AUTH_RX_UNENCRYPTED_EAPOL: case IW_AUTH_KEY_MGMT: break; + case IW_AUTH_CIPHER_PAIRWISE: + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (data->value & (IW_AUTH_CIPHER_WEP40 | + IW_AUTH_CIPHER_WEP104 | IW_AUTH_CIPHER_TKIP)) + sdata->u.sta.flags |= + IEEE80211_STA_TKIP_WEP_USED; + else + sdata->u.sta.flags &= + ~IEEE80211_STA_TKIP_WEP_USED; + } + break; case IW_AUTH_DROP_UNENCRYPTED: sdata->drop_unencrypted = !!data->value; break; -- cgit v1.2.3 From d063ed0f0cd623b45edc6f4781dda6478c56bb4f Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Tue, 23 Dec 2008 18:17:19 -0800 Subject: mac80211: Reset the power save timer from master_start_xmit. When a null data frame is generated from mac80211, it goes through master_start_xmit and not through subif_start_xmit. Hence for the power save timer to be triggered while sending this null data frame also, the timer has to be reset from master_start_xmit. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/tx.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4278e545638f..0bf2272200ad 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1296,6 +1296,19 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } + if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && + local->dynamic_ps_timeout > 0) { + if (local->hw.conf.flags & IEEE80211_CONF_PS) { + ieee80211_stop_queues_by_reason(&local->hw, + IEEE80211_QUEUE_STOP_REASON_PS); + queue_work(local->hw.workqueue, + &local->dynamic_ps_disable_work); + } + + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(local->dynamic_ps_timeout)); + } + memset(info, 0, sizeof(*info)); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; @@ -1475,19 +1488,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, goto fail; } - if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && - local->dynamic_ps_timeout > 0) { - if (local->hw.conf.flags & IEEE80211_CONF_PS) { - ieee80211_stop_queues_by_reason(&local->hw, - IEEE80211_QUEUE_STOP_REASON_PS); - queue_work(local->hw.workqueue, - &local->dynamic_ps_disable_work); - } - - mod_timer(&local->dynamic_ps_timer, jiffies + - msecs_to_jiffies(local->dynamic_ps_timeout)); - } - nh_pos = skb_network_header(skb) - skb->data; h_pos = skb_transport_header(skb) - skb->data; -- cgit v1.2.3 From 869717fbe43eb831cbebd03a9a66a4a4c3b406a9 Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Tue, 23 Dec 2008 18:28:34 -0800 Subject: mac80211: A couple of fixes to dynamic power save. a) hw_config() should not be called from siwpower() for the drivers which do not support dynamic powersave. b) IEEE80211_HW_NO_STACK_DYNAMIC_PS needs to be verified in set_associated() also before enabling the power save timers. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 3 ++- net/mac80211/wext.c | 9 +++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b688425d7555..b3c99d3c61ba 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -751,7 +751,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= BSS_CHANGED_BASIC_RATES; ieee80211_bss_info_change_notify(sdata, bss_info_changed); - if (local->powersave) { + if (local->powersave && + !(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS)) { if (local->dynamic_ps_timeout > 0) mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->dynamic_ps_timeout)); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 011592fd4528..8568f1e7266f 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -865,9 +865,9 @@ set: local->powersave = ps; local->dynamic_ps_timeout = timeout; - if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { - if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && - local->dynamic_ps_timeout > 0) + if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && + (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) { + if (local->dynamic_ps_timeout > 0) mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->dynamic_ps_timeout)); else { @@ -875,8 +875,9 @@ set: conf->flags |= IEEE80211_CONF_PS; else conf->flags &= ~IEEE80211_CONF_PS; + ret = ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); } - ret = ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } return ret; -- cgit v1.2.3 From a97b77b90decf27a86ac40ea53a741ffb5ead21a Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Tue, 23 Dec 2008 18:39:02 -0800 Subject: mac80211: Enhancements to dynamic power save. This patch enables mac80211 to send a null frame and also to check for tim in the beacon if dynamic power save is enabled. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/mlme.c | 41 ++++++++++++++++++++++++++++++++++++++++- net/mac80211/scan.c | 2 +- net/mac80211/wext.c | 13 +++++++++---- 4 files changed, 53 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5f8ad885a48a..117718bd96ec 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -987,6 +987,9 @@ u64 ieee80211_mandatory_rates(struct ieee80211_local *local, void ieee80211_dynamic_ps_enable_work(struct work_struct *work); void ieee80211_dynamic_ps_disable_work(struct work_struct *work); void ieee80211_dynamic_ps_timer(unsigned long data); +void ieee80211_send_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int powersave); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b3c99d3c61ba..599a42172a16 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -575,6 +575,30 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, } } +static bool check_tim(struct ieee802_11_elems *elems, u16 aid, bool *is_mc) +{ + u8 mask; + u8 index, indexn1, indexn2; + struct ieee80211_tim_ie *tim = (struct ieee80211_tim_ie *) elems->tim; + + aid &= 0x3fff; + index = aid / 8; + mask = 1 << (aid & 7); + + if (tim->bitmap_ctrl & 0x01) + *is_mc = true; + + indexn1 = tim->bitmap_ctrl & 0xfe; + indexn2 = elems->tim_len + indexn1 - 4; + + if (index < indexn1 || index > indexn2) + return false; + + index -= indexn1; + + return !!(tim->virtual_map[index] & mask); +} + static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, u16 capab, bool erp_valid, u8 erp) { @@ -757,6 +781,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->dynamic_ps_timeout)); else { + ieee80211_send_nullfunc(local, sdata, 1); conf->flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); @@ -1720,7 +1745,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; struct ieee80211_local *local = sdata->local; u32 changed = 0; - bool erp_valid; + bool erp_valid, directed_tim, is_mc = false; u8 erp_value = 0; /* Process beacon from the current BSS */ @@ -1743,6 +1768,18 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); + if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS)) { + directed_tim = check_tim(&elems, ifsta->aid, &is_mc); + + if (directed_tim || is_mc) { + if (local->hw.conf.flags && IEEE80211_CONF_PS) { + local->hw.conf.flags &= ~IEEE80211_CONF_PS; + ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + ieee80211_send_nullfunc(local, sdata, 0); + } + } + } if (elems.erp_info && elems.erp_info_len >= 1) { erp_valid = true; @@ -2631,10 +2668,12 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, dynamic_ps_enable_work); + struct ieee80211_sub_if_data *sdata = local->scan_sdata; if (local->hw.conf.flags & IEEE80211_CONF_PS) return; + ieee80211_send_nullfunc(local, sdata, 1); local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f5c7c3371929..a2caeed57f4e 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -395,7 +395,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, return RX_QUEUED; } -static void ieee80211_send_nullfunc(struct ieee80211_local *local, +void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int powersave) { diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 8568f1e7266f..48fc6b9a62a4 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -871,12 +871,17 @@ set: mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies(local->dynamic_ps_timeout)); else { - if (local->powersave) + if (local->powersave) { + ieee80211_send_nullfunc(local, sdata, 1); conf->flags |= IEEE80211_CONF_PS; - else + ret = ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + } else { conf->flags &= ~IEEE80211_CONF_PS; - ret = ieee80211_hw_config(local, - IEEE80211_CONF_CHANGE_PS); + ret = ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + ieee80211_send_nullfunc(local, sdata, 0); + } } } -- cgit v1.2.3 From 7cbf0ba5193d1f3bb3caaa06668e22bc86776e41 Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Wed, 24 Dec 2008 00:34:37 -0800 Subject: mac80211: Cancel the power save timer in ieee80211_stop. Since the station info is flushed before calling set_disassoc in ieee80211_stop, the power save timer is never cancelled when the driver is unloaded. Hence the timer cancellation has to be done in ieee80211_stop itself. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/iface.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1eefc5df4954..8e0e3303ca8c 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -383,6 +383,8 @@ static int ieee80211_stop(struct net_device *dev) atomic_dec(&local->iff_promiscs); dev_mc_unsync(local->mdev, dev); + del_timer_sync(&local->dynamic_ps_timer); + cancel_work_sync(&local->dynamic_ps_enable_work); /* APs need special treatment */ if (sdata->vif.type == NL80211_IFTYPE_AP) { -- cgit v1.2.3 From 285256a59d790c6a9afe8ec82804a369d956ac06 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 23 Dec 2008 15:58:45 -0800 Subject: mac80211: no need for ht.enabled We can simply use conf_is_ht() check where needed. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ht.c | 3 +-- net/mac80211/main.c | 10 ---------- net/mac80211/mlme.c | 1 - 3 files changed, 1 insertion(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index c5c0c5271096..f6547de5ac6b 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -130,11 +130,10 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, } } - ht_changed = local->hw.conf.ht.enabled != enable_ht || + ht_changed = conf_is_ht(&local->hw.conf) != enable_ht || channel_type != local->hw.conf.ht.channel_type; local->oper_channel_type = channel_type; - local->hw.conf.ht.enabled = enable_ht; if (ht_changed) ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 24b14363d6e7..a6cb480dda0d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -211,16 +211,6 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) channel_type != local->hw.conf.ht.channel_type) { local->hw.conf.channel = chan; local->hw.conf.ht.channel_type = channel_type; - switch (channel_type) { - case NL80211_CHAN_NO_HT: - local->hw.conf.ht.enabled = false; - break; - case NL80211_CHAN_HT20: - case NL80211_CHAN_HT40MINUS: - case NL80211_CHAN_HT40PLUS: - local->hw.conf.ht.enabled = true; - break; - } changed |= IEEE80211_CONF_CHANGE_CHANNEL; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 599a42172a16..12976026cc45 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -901,7 +901,6 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - local->hw.conf.ht.enabled = false; local->oper_channel_type = NL80211_CHAN_NO_HT; config_changed |= IEEE80211_CONF_CHANGE_HT; -- cgit v1.2.3 From e3c92df08cbf6a0cb60a9c7ce377378383967e07 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Wed, 24 Dec 2008 13:53:11 +0530 Subject: mac80211: Fix tx power setting power_level in ieee80211_conf is being used for more than one purpose. It being used as user configured power limit and the final power limit given to the driver. By doing so, except very first time, the tx power limit is taken from min(chan->max_power, local->hw.conf.power_level) which is not what we want. This patch defines a new memeber in ieee80211_conf which is meant only for user configured power limit. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: John W. Linville --- net/mac80211/main.c | 4 ++-- net/mac80211/wext.c | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a6cb480dda0d..dca4b7da6cad 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -214,10 +214,10 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) changed |= IEEE80211_CONF_CHANGE_CHANNEL; } - if (!local->hw.conf.power_level) + if (!local->hw.conf.user_power_level) power = chan->max_power; else - power = min(chan->max_power, local->hw.conf.power_level); + power = min(chan->max_power, local->hw.conf.user_power_level); if (local->hw.conf.power_level != power) { changed |= IEEE80211_CONF_CHANGE_POWER; local->hw.conf.power_level = power; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 48fc6b9a62a4..654041b93736 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -549,10 +549,9 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, else /* Automatic power level setting */ new_power_level = chan->max_power; - if (local->hw.conf.power_level != new_power_level) { - local->hw.conf.power_level = new_power_level; + local->hw.conf.user_power_level = new_power_level; + if (local->hw.conf.power_level != new_power_level) reconf_flags |= IEEE80211_CONF_CHANGE_POWER; - } if (local->hw.conf.radio_enabled != !(data->txpower.disabled)) { local->hw.conf.radio_enabled = !(data->txpower.disabled); -- cgit v1.2.3 From b3093664c931aa06fc50da42e25b3b6dc307a915 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Mon, 29 Dec 2008 10:02:48 +0200 Subject: mac80211: make wake/stop_queue_by_reason() functions static Fixes sparse warnings: net/mac80211/util.c:355:6: warning: symbol 'ieee80211_wake_queue_by_reason' was not declared. Should it be static? net/mac80211/util.c:385:6: warning: symbol 'ieee80211_stop_queue_by_reason' was not declared. Should it be static? Thanks to Johannes Berg for reporting this. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/util.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index fb89e1d0aa03..5cd430333f08 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -352,8 +352,8 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, } } -void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason) +static void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; @@ -382,8 +382,8 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, netif_stop_subqueue(local->mdev, queue); } -void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason) +static void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; -- cgit v1.2.3 From dc822b5db479dc0178d5c04cbb656dad0b6564fb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Dec 2008 12:55:09 +0100 Subject: mac80211: clean up set_key callback The set_key callback now seems rather odd, passing a MAC address instead of a station struct, and a local address instead of a vif struct. Change that. Signed-off-by: Johannes Berg Acked-by: Bob Copeland [ath5k] Acked-by: Ivo van Doorn [rt2x00] Acked-by: Christian Lamparter [p54] Tested-by: Kalle Valo [iwl3945] Tested-by: Samuel Ortiz [iwl3945] Signed-off-by: John W. Linville --- net/mac80211/key.c | 51 +++++++++++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 26 deletions(-) (limited to 'net') diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 999f7aa42326..b0a025c9b615 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -47,7 +47,6 @@ */ static const u8 bcast_addr[ETH_ALEN] = { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; -static const u8 zero_addr[ETH_ALEN]; /* key mutex: used to synchronise todo runners */ static DEFINE_MUTEX(key_mutex); @@ -108,29 +107,18 @@ static void assert_key_lock(void) WARN_ON(!mutex_is_locked(&key_mutex)); } -static const u8 *get_mac_for_key(struct ieee80211_key *key) +static struct ieee80211_sta *get_sta_for_key(struct ieee80211_key *key) { - const u8 *addr = bcast_addr; - - /* - * If we're an AP we won't ever receive frames with a non-WEP - * group key so we tell the driver that by using the zero MAC - * address to indicate a transmit-only key. - */ - if (key->conf.alg != ALG_WEP && - (key->sdata->vif.type == NL80211_IFTYPE_AP || - key->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) - addr = zero_addr; - if (key->sta) - addr = key->sta->sta.addr; + return &key->sta->sta; - return addr; + return NULL; } static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) { - const u8 *addr; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_sta *sta; int ret; assert_key_lock(); @@ -139,11 +127,16 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) if (!key->local->ops->set_key) return; - addr = get_mac_for_key(key); + sta = get_sta_for_key(key); + + sdata = key->sdata; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); ret = key->local->ops->set_key(local_to_hw(key->local), SET_KEY, - key->sdata->dev->dev_addr, addr, - &key->conf); + &sdata->vif, sta, &key->conf); if (!ret) { spin_lock(&todo_lock); @@ -155,12 +148,13 @@ static void ieee80211_key_enable_hw_accel(struct ieee80211_key *key) printk(KERN_ERR "mac80211-%s: failed to set key " "(%d, %pM) to hardware (%d)\n", wiphy_name(key->local->hw.wiphy), - key->conf.keyidx, addr, ret); + key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); } static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) { - const u8 *addr; + struct ieee80211_sub_if_data *sdata; + struct ieee80211_sta *sta; int ret; assert_key_lock(); @@ -176,17 +170,22 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) } spin_unlock(&todo_lock); - addr = get_mac_for_key(key); + sta = get_sta_for_key(key); + sdata = key->sdata; + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); ret = key->local->ops->set_key(local_to_hw(key->local), DISABLE_KEY, - key->sdata->dev->dev_addr, addr, - &key->conf); + &sdata->vif, sta, &key->conf); if (ret) printk(KERN_ERR "mac80211-%s: failed to remove key " "(%d, %pM) from hardware (%d)\n", wiphy_name(key->local->hw.wiphy), - key->conf.keyidx, addr, ret); + key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); spin_lock(&todo_lock); key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; -- cgit v1.2.3 From 0efcdfd6ed4e7ac74c45e7c3218fd1a7416fdb3f Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Tue, 6 Jan 2009 02:41:35 +0100 Subject: mac80211: Disallow to set multicast BSSID Okay, here is the first of the five patches. After applying all of them you should be able to build/join huge city mesh networks (e.g. with the OLSR protocol) with the most of the mac80211 wireless drivers by setting a fixed BSSID in the ad hoc mode. (If you found no other bug/problem.) This was not specified in the original standard, but is a widely used de facto standard. The first patch now completely disallow to set multicast MAC addresses as BSSID. The behavior before was really strange. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 12976026cc45..f80dc2535709 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2548,11 +2548,16 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) { struct ieee80211_if_sta *ifsta; int res; + bool valid; ifsta = &sdata->u.sta; + valid = is_valid_ether_addr(bssid); if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { - memcpy(ifsta->bssid, bssid, ETH_ALEN); + if(valid) + memcpy(ifsta->bssid, bssid, ETH_ALEN); + else + memset(ifsta->bssid, 0, ETH_ALEN); res = 0; /* * Hack! See also ieee80211_sta_set_ssid. @@ -2566,7 +2571,7 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) } } - if (is_valid_ether_addr(bssid)) + if (valid) ifsta->flags |= IEEE80211_STA_BSSID_SET; else ifsta->flags &= ~IEEE80211_STA_BSSID_SET; -- cgit v1.2.3 From 137f9f46a4edf8a937ffe9e3dba498b5cfaa1e5b Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Tue, 6 Jan 2009 02:49:07 +0100 Subject: mac80211: Don't scan if BSSID and channel are set manually If you set a fixed BSSID and channel it's not necessary to scan for neighbors to merge, because you really don't want to merge with it. So don't do it. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f80dc2535709..563ceb4d2252 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2009,6 +2009,10 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata, if (ieee80211_sta_active_ibss(sdata)) return; + if ((sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) && + (!(sdata->u.sta.flags & IEEE80211_STA_AUTO_CHANNEL_SEL))) + return; + printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " "IBSS networks with same SSID (merge)\n", sdata->dev->name); ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len); -- cgit v1.2.3 From 65f0e6a36e25fbfa6adf706d9c53bf64b13096eb Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Tue, 6 Jan 2009 03:08:10 +0100 Subject: mac80211: Don't merge if BSSID is set manually If you set a fixed BSSID manually, you never want that the driver change it back, or your ad-hoc mesh network will break into peaces. So don't do it. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 563ceb4d2252..2db56605a2b6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1644,6 +1644,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, /* check if we need to merge IBSS */ if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon && + (!(sdata->u.sta.flags & IEEE80211_STA_BSSID_SET)) && bss->capability & WLAN_CAPABILITY_IBSS && bss->freq == local->oper_channel->center_freq && elems->ssid_len == sdata->u.sta.ssid_len && -- cgit v1.2.3 From b522ed56ef90f5078a2a1253e390299723510a89 Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Tue, 6 Jan 2009 03:15:23 +0100 Subject: mac80211: Allow to set channel in adhoc properly The last patch fixes a bug that it was not possible to set the channel manually in the ad hoc mode properly. Please commit this patches so that we don't need the proprietary Broadcom driver in the near future anymore. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/wext.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 654041b93736..bb2c7135a1c8 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -230,13 +230,15 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (sdata->vif.type == NL80211_IFTYPE_ADHOC || + sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ if (freq->e == 0) { if (freq->m < 0) { - if (sdata->vif.type == NL80211_IFTYPE_STATION) + if (sdata->vif.type == NL80211_IFTYPE_ADHOC || + sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.sta.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; -- cgit v1.2.3 From c481ec9705d4a5d566393bc17374cfd82c870715 Mon Sep 17 00:00:00 2001 From: Sujith Date: Tue, 6 Jan 2009 09:28:37 +0530 Subject: mac80211: Add 802.11h CSA support Move to the advertised channel on reception of a CSA element. This is needed for 802.11h compliance. Signed-off-by: Sujith Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 11 ++++++- net/mac80211/iface.c | 2 ++ net/mac80211/mlme.c | 13 ++++++++ net/mac80211/rx.c | 20 ++++++++++++ net/mac80211/spectmgmt.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 122 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 117718bd96ec..d2a007aa8e73 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -259,6 +259,7 @@ struct mesh_preq_queue { #define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) #define IEEE80211_STA_PRIVACY_INVOKED BIT(13) #define IEEE80211_STA_TKIP_WEP_USED BIT(14) +#define IEEE80211_STA_CSA_RECEIVED BIT(15) /* flags for MLME request */ #define IEEE80211_STA_REQ_SCAN 0 #define IEEE80211_STA_REQ_DIRECT_PROBE 1 @@ -283,7 +284,9 @@ enum ieee80211_sta_mlme_state { struct ieee80211_if_sta { struct timer_list timer; + struct timer_list chswitch_timer; struct work_struct work; + struct work_struct chswitch_work; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; enum ieee80211_sta_mlme_state state; @@ -542,6 +545,7 @@ enum { enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_DRIVER, IEEE80211_QUEUE_STOP_REASON_PS, + IEEE80211_QUEUE_STOP_REASON_CSA }; /* maximum number of hardware queues we support. */ @@ -631,7 +635,7 @@ struct ieee80211_local { unsigned long last_scan_completed; struct delayed_work scan_work; struct ieee80211_sub_if_data *scan_sdata; - struct ieee80211_channel *oper_channel, *scan_channel; + struct ieee80211_channel *oper_channel, *scan_channel, *csa_channel; enum nl80211_channel_type oper_channel_type; u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; size_t scan_ssid_len; @@ -964,6 +968,11 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); +void ieee80211_chswitch_timer(unsigned long data); +void ieee80211_chswitch_work(struct work_struct *work); +void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss); /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8e0e3303ca8c..5d5a029228be 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -443,6 +443,7 @@ static int ieee80211_stop(struct net_device *dev) WLAN_REASON_DEAUTH_LEAVING); memset(sdata->u.sta.bssid, 0, ETH_ALEN); + del_timer_sync(&sdata->u.sta.chswitch_timer); del_timer_sync(&sdata->u.sta.timer); /* * If the timer fired while we waited for it, it will have @@ -452,6 +453,7 @@ static int ieee80211_stop(struct net_device *dev) * it no longer is. */ cancel_work_sync(&sdata->u.sta.work); + cancel_work_sync(&sdata->u.sta.chswitch_work); /* * When we get here, the interface is marked down. * Call synchronize_rcu() to wait for the RX path diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 2db56605a2b6..cac4f65d9e61 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1629,6 +1629,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (!bss) return; + if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) && + (memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0)) { + struct ieee80211_channel_sw_ie *sw_elem = + (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem; + ieee80211_process_chanswitch(sdata, sw_elem, bss); + } + /* was just updated in ieee80211_bss_info_update */ beacon_timestamp = bss->timestamp; @@ -1765,6 +1772,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; + if (rx_status->freq != local->hw.conf.channel->center_freq) + return; + ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); @@ -2425,8 +2435,11 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifsta = &sdata->u.sta; INIT_WORK(&ifsta->work, ieee80211_sta_work); + INIT_WORK(&ifsta->chswitch_work, ieee80211_chswitch_work); setup_timer(&ifsta->timer, ieee80211_sta_timer, (unsigned long) sdata); + setup_timer(&ifsta->chswitch_timer, ieee80211_chswitch_timer, + (unsigned long) sdata); skb_queue_head_init(&ifsta->skb_queue); ifsta->capab = WLAN_CAPABILITY_ESS; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 7175ae80c36a..ddb966f58882 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1552,7 +1552,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + struct ieee80211_if_sta *ifsta = &sdata->u.sta; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + struct ieee80211_bss *bss; int len = rx->skb->len; if (!ieee80211_is_action(mgmt->frame_control)) @@ -1601,6 +1603,24 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) return RX_DROP_MONITOR; ieee80211_process_measurement_req(sdata, mgmt, len); break; + case WLAN_ACTION_SPCT_CHL_SWITCH: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.chan_switch))) + return RX_DROP_MONITOR; + + if (memcmp(mgmt->bssid, ifsta->bssid, ETH_ALEN) != 0) + return RX_DROP_MONITOR; + + bss = ieee80211_rx_bss_get(local, ifsta->bssid, + local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); + if (!bss) + return RX_DROP_MONITOR; + + ieee80211_process_chanswitch(sdata, + &mgmt->u.action.u.chan_switch.sw_elem, bss); + ieee80211_rx_bss_put(local, bss); + break; } break; default: diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index f72bad636d8e..22ad4808e01a 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -84,3 +84,80 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, mgmt->sa, mgmt->bssid, mgmt->u.action.u.measurement.dialog_token); } + +void ieee80211_chswitch_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, u.sta.chswitch_work); + struct ieee80211_bss *bss; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + + if (!netif_running(sdata->dev)) + return; + + bss = ieee80211_rx_bss_get(sdata->local, ifsta->bssid, + sdata->local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); + if (!bss) + goto exit; + + sdata->local->oper_channel = sdata->local->csa_channel; + if (!ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL)) + bss->freq = sdata->local->oper_channel->center_freq; + + ieee80211_rx_bss_put(sdata->local, bss); +exit: + ifsta->flags &= ~IEEE80211_STA_CSA_RECEIVED; + ieee80211_wake_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); +} + +void ieee80211_chswitch_timer(unsigned long data) +{ + struct ieee80211_sub_if_data *sdata = + (struct ieee80211_sub_if_data *) data; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + + queue_work(sdata->local->hw.workqueue, &ifsta->chswitch_work); +} + +void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, + struct ieee80211_channel_sw_ie *sw_elem, + struct ieee80211_bss *bss) +{ + struct ieee80211_channel *new_ch; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); + + /* FIXME: Handle ADHOC later */ + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return; + + if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATED) + return; + + if (sdata->local->sw_scanning || sdata->local->hw_scanning) + return; + + /* Disregard subsequent beacons if we are already running a timer + processing a CSA */ + + if (ifsta->flags & IEEE80211_STA_CSA_RECEIVED) + return; + + new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); + if (!new_ch || new_ch->flags & IEEE80211_CHAN_DISABLED) + return; + + sdata->local->csa_channel = new_ch; + + if (sw_elem->count <= 1) { + queue_work(sdata->local->hw.workqueue, &ifsta->chswitch_work); + } else { + ieee80211_stop_queues_by_reason(&sdata->local->hw, + IEEE80211_QUEUE_STOP_REASON_CSA); + ifsta->flags |= IEEE80211_STA_CSA_RECEIVED; + mod_timer(&ifsta->chswitch_timer, + jiffies + msecs_to_jiffies(sw_elem->count * bss->beacon_int)); + } +} -- cgit v1.2.3 From def1343971b2abd158ece1a71dd1c7a20e4c2fcb Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 6 Jan 2009 10:50:33 +0200 Subject: mac80211: remove an unnecessary assignment to info in __ieee80211_tx(). This patch removes an unnecessary assignment to info in __ieee80211_tx() , tx.c. Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- net/mac80211/tx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0bf2272200ad..96eca341160b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1053,7 +1053,6 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, if (skb) { if (netif_subqueue_stopped(local->mdev, skb)) return IEEE80211_TX_AGAIN; - info = IEEE80211_SKB_CB(skb); ret = local->ops->tx(local_to_hw(local), skb); if (ret) -- cgit v1.2.3 From 504a71e4c2718d8ef5dc5bff89dea47a91cf87e5 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 6 Jan 2009 10:50:51 +0200 Subject: mac80211: remove an unused parameter in ieee80211_rx_mgmt_probe_req(). This patch removes an unused parameter (rx_status) in ieee80211_rx_mgmt_probe_req(), in mlme.c. Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index cac4f65d9e61..aafa112ae09c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1842,8 +1842,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) + size_t len) { struct ieee80211_local *local = sdata->local; int tx_last_beacon; @@ -1958,8 +1957,7 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: - ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, skb->len, - rx_status); + ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, skb->len); break; case IEEE80211_STYPE_PROBE_RESP: ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status); -- cgit v1.2.3 From 81d963a1f6aeefca5527cc605f863eb82a634eab Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 6 Jan 2009 10:51:01 +0200 Subject: mac80211: remove an unused definition (MAX_STA_COUNT) in sta_info.h. This patch removes an unused definition of MAX_STA_COUNT in sta_info.h. Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- net/mac80211/sta_info.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index e49a5b99cf10..b683d3f5ef8a 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -382,8 +382,6 @@ static inline u32 get_sta_flags(struct sta_info *sta) } -/* Maximum number of concurrently registered stations */ -#define MAX_STA_COUNT 2007 #define STA_HASH_SIZE 256 #define STA_HASH(sta) (sta[5]) -- cgit v1.2.3 From 8fe12920dc5fa0a0db7cad3661223d5f78a39c60 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 6 Jan 2009 15:24:57 +0200 Subject: mac80211: remove unused variable in ieee80211_local (dot11WEPUndecryptableCount). This patch removes an unused declaration of dot11WEPUndecryptableCount (an snmp counter) in ieee80211_local structure and its usage in debugfs.c since this counter is not incremented/decremented anywhere. Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- net/mac80211/debugfs.c | 4 ---- net/mac80211/ieee80211_i.h | 1 - 2 files changed, 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2697a2fe608f..18541bb75096 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -136,8 +136,6 @@ DEBUGFS_STATS_FILE(multicast_received_frame_count, 20, "%u", local->dot11MulticastReceivedFrameCount); DEBUGFS_STATS_FILE(transmitted_frame_count, 20, "%u", local->dot11TransmittedFrameCount); -DEBUGFS_STATS_FILE(wep_undecryptable_count, 20, "%u", - local->dot11WEPUndecryptableCount); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS DEBUGFS_STATS_FILE(tx_handlers_drop, 20, "%u", local->tx_handlers_drop); @@ -221,7 +219,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_STATS_ADD(received_fragment_count); DEBUGFS_STATS_ADD(multicast_received_frame_count); DEBUGFS_STATS_ADD(transmitted_frame_count); - DEBUGFS_STATS_ADD(wep_undecryptable_count); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS DEBUGFS_STATS_ADD(tx_handlers_drop); DEBUGFS_STATS_ADD(tx_handlers_queued); @@ -268,7 +265,6 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_STATS_DEL(received_fragment_count); DEBUGFS_STATS_DEL(multicast_received_frame_count); DEBUGFS_STATS_DEL(transmitted_frame_count); - DEBUGFS_STATS_DEL(wep_undecryptable_count); DEBUGFS_STATS_DEL(num_scans); #ifdef CONFIG_MAC80211_DEBUG_COUNTERS DEBUGFS_STATS_DEL(tx_handlers_drop); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d2a007aa8e73..85c4d3144f9f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -654,7 +654,6 @@ struct ieee80211_local { u32 dot11ReceivedFragmentCount; u32 dot11MulticastReceivedFrameCount; u32 dot11TransmittedFrameCount; - u32 dot11WEPUndecryptableCount; #ifdef CONFIG_MAC80211_LEDS int tx_led_counter, rx_led_counter; -- cgit v1.2.3 From 2bf30fabadbdcb535b057afc92aba015884847dc Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Jan 2009 23:23:56 +0100 Subject: mac80211: remove user_power_level from driver API I missed this during review of "mac80211: Fix tx power setting", the user_power_level shouldn't be available to the driver but rather be an internal value used to calculate the value for the driver. Signed-off-by: Johannes Berg Cc: Vasanthakumar Thiagarajan Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/main.c | 4 ++-- net/mac80211/wext.c | 2 +- 3 files changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 85c4d3144f9f..fa5ca14517f5 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -705,6 +705,8 @@ struct ieee80211_local { struct work_struct dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; + int user_power_level; /* in dBm */ + #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { struct dentry *rcdir; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index dca4b7da6cad..b55b9970dc97 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -214,10 +214,10 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) changed |= IEEE80211_CONF_CHANGE_CHANNEL; } - if (!local->hw.conf.user_power_level) + if (!local->user_power_level) power = chan->max_power; else - power = min(chan->max_power, local->hw.conf.user_power_level); + power = min(chan->max_power, local->user_power_level); if (local->hw.conf.power_level != power) { changed |= IEEE80211_CONF_CHANGE_POWER; local->hw.conf.power_level = power; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index bb2c7135a1c8..5690c3d41e7d 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -551,7 +551,7 @@ static int ieee80211_ioctl_siwtxpower(struct net_device *dev, else /* Automatic power level setting */ new_power_level = chan->max_power; - local->hw.conf.user_power_level = new_power_level; + local->user_power_level = new_power_level; if (local->hw.conf.power_level != new_power_level) reconf_flags |= IEEE80211_CONF_CHANGE_POWER; -- cgit v1.2.3 From d1c3a37ceeb1a5ea02991a0476355f1a1d3b3e83 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Jan 2009 00:26:10 +0100 Subject: mac80211: clarify alignment docs, fix up alignment Not all drivers are capable of passing properly aligned frames, in particular with mesh networking no hardware will support completely aligning it correctly. This patch adds code to align the data payload to a 4-byte boundary in memory for those platforms that require this, or when CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is set. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 110 ++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ddb966f58882..b68e082e99ce 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -102,7 +102,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, return len; } -/** +/* * ieee80211_add_rx_radiotap_header - add radiotap header * * add a radiotap header containing all the fields which the hardware provided. @@ -371,39 +371,50 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) rx->skb->priority = (tid > 7) ? 0 : tid; } -static void ieee80211_verify_ip_alignment(struct ieee80211_rx_data *rx) +/** + * DOC: Packet alignment + * + * Drivers always need to pass packets that are aligned to two-byte boundaries + * to the stack. + * + * Additionally, should, if possible, align the payload data in a way that + * guarantees that the contained IP header is aligned to a four-byte + * boundary. In the case of regular frames, this simply means aligning the + * payload to a four-byte boundary (because either the IP header is directly + * contained, or IV/RFC1042 headers that have a length divisible by four are + * in front of it). + * + * With A-MSDU frames, however, the payload data address must yield two modulo + * four because there are 14-byte 802.3 headers within the A-MSDU frames that + * push the IP header further back to a multiple of four again. Thankfully, the + * specs were sane enough this time around to require padding each A-MSDU + * subframe to a length that is a multiple of four. + * + * Padding like Atheros hardware adds which is inbetween the 802.11 header and + * the payload is not supported, the driver is required to move the 802.11 + * header to be directly in front of the payload in that case. + */ +static void ieee80211_verify_alignment(struct ieee80211_rx_data *rx) { -#ifdef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; int hdrlen; +#ifndef CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT + return; +#endif + + if (WARN_ONCE((unsigned long)rx->skb->data & 1, + "unaligned packet at 0x%p\n", rx->skb->data)) + return; + if (!ieee80211_is_data_present(hdr->frame_control)) return; - /* - * Drivers are required to align the payload data in a way that - * guarantees that the contained IP header is aligned to a four- - * byte boundary. In the case of regular frames, this simply means - * aligning the payload to a four-byte boundary (because either - * the IP header is directly contained, or IV/RFC1042 headers that - * have a length divisible by four are in front of it. - * - * With A-MSDU frames, however, the payload data address must - * yield two modulo four because there are 14-byte 802.3 headers - * within the A-MSDU frames that push the IP header further back - * to a multiple of four again. Thankfully, the specs were sane - * enough this time around to require padding each A-MSDU subframe - * to a length that is a multiple of four. - * - * Padding like atheros hardware adds which is inbetween the 802.11 - * header and the payload is not supported, the driver is required - * to move the 802.11 header further back in that case. - */ hdrlen = ieee80211_hdrlen(hdr->frame_control); if (rx->flags & IEEE80211_RX_AMSDU) hdrlen += ETH_HLEN; - WARN_ON_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3); -#endif + WARN_ONCE(((unsigned long)(rx->skb->data + hdrlen)) & 3, + "unaligned IP payload at 0x%p\n", rx->skb->data + hdrlen); } @@ -1267,10 +1278,37 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) } if (skb) { - /* deliver to local stack */ - skb->protocol = eth_type_trans(skb, dev); - memset(skb->cb, 0, sizeof(skb->cb)); - netif_rx(skb); + int align __maybe_unused; + +#if defined(CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT) || !defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) + /* + * 'align' will only take the values 0 or 2 here + * since all frames are required to be aligned + * to 2-byte boundaries when being passed to + * mac80211. That also explains the __skb_push() + * below. + */ + align = (unsigned long)skb->data & 4; + if (align) { + if (WARN_ON(skb_headroom(skb) < 3)) { + dev_kfree_skb(skb); + skb = NULL; + } else { + u8 *data = skb->data; + size_t len = skb->len; + u8 *new = __skb_push(skb, align); + memmove(new, data, len); + __skb_trim(skb, len); + } + } +#endif + + if (skb) { + /* deliver to local stack */ + skb->protocol = eth_type_trans(skb, dev); + memset(skb->cb, 0, sizeof(skb->cb)); + netif_rx(skb); + } } if (xmit_skb) { @@ -1339,14 +1377,20 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (remaining <= subframe_len + padding) frame = skb; else { - frame = dev_alloc_skb(local->hw.extra_tx_headroom + - subframe_len); + /* + * Allocate and reserve two bytes more for payload + * alignment since sizeof(struct ethhdr) is 14. + */ + frame = dev_alloc_skb( + ALIGN(local->hw.extra_tx_headroom, 4) + + subframe_len + 2); if (frame == NULL) return RX_DROP_UNUSABLE; - skb_reserve(frame, local->hw.extra_tx_headroom + - sizeof(struct ethhdr)); + skb_reserve(frame, + ALIGN(local->hw.extra_tx_headroom, 4) + + sizeof(struct ethhdr) + 2); memcpy(skb_put(frame, ntohs(len)), skb->data, ntohs(len)); @@ -1976,7 +2020,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, rx.flags |= IEEE80211_RX_IN_SCAN; ieee80211_parse_qos(&rx); - ieee80211_verify_ip_alignment(&rx); + ieee80211_verify_alignment(&rx); skb = rx.skb; -- cgit v1.2.3 From 4797938c5dfa22af30fd16679192972f878419a1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Jan 2009 10:13:27 +0100 Subject: mac80211: clean up channel type config The channel_type really doesn't need to be the only member in a new structure, so remove the struct. Additionally, remove the _CONF_CHANGE_HT flag and use _CONF_CHANGE_CHANNEL when the channel type changes, since that's enough of a change to require reprogramming the hardware anyway. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ht.c | 8 +++++--- net/mac80211/main.c | 4 ++-- net/mac80211/mlme.c | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index f6547de5ac6b..832adf888ac3 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -131,12 +131,14 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, } ht_changed = conf_is_ht(&local->hw.conf) != enable_ht || - channel_type != local->hw.conf.ht.channel_type; + channel_type != local->hw.conf.channel_type; local->oper_channel_type = channel_type; - if (ht_changed) - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_HT); + if (ht_changed) { + /* channel_type change automatically detected */ + ieee80211_hw_config(local, 0); + } /* disable HT */ if (!enable_ht) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b55b9970dc97..e9f3e85d1a9e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -208,9 +208,9 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) } if (chan != local->hw.conf.channel || - channel_type != local->hw.conf.ht.channel_type) { + channel_type != local->hw.conf.channel_type) { local->hw.conf.channel = chan; - local->hw.conf.ht.channel_type = channel_type; + local->hw.conf.channel_type = channel_type; changed |= IEEE80211_CONF_CHANGE_CHANNEL; } diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index aafa112ae09c..6a90171c859f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -901,8 +901,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); + /* channel(_type) changes are handled by ieee80211_hw_config */ local->oper_channel_type = NL80211_CHAN_NO_HT; - config_changed |= IEEE80211_CONF_CHANGE_HT; del_timer_sync(&local->dynamic_ps_timer); cancel_work_sync(&local->dynamic_ps_enable_work); -- cgit v1.2.3 From e9aeabaeb9a0bece50100dc74bbd720a68cb8f5c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Jan 2009 18:12:35 +0100 Subject: mac80211: validate SIOCSIWPOWER arguments better Don't accept any arguments we don't handle, and return error codes instead of using an uninitialised stack value. Signed-off-by: Johannes Berg Reviewed-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/wext.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 5690c3d41e7d..3fc1b903bfbc 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -853,9 +853,12 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, ps = true; break; default: /* Otherwise we ignore */ - break; + return -EINVAL; } + if (wrq->flags & ~(IW_POWER_MODE | IW_POWER_TIMEOUT)) + return -EINVAL; + if (wrq->flags & IW_POWER_TIMEOUT) timeout = wrq->value / 1000; -- cgit v1.2.3 From 46f2c4bd7e2ba2cfedbcd4fe15d316eebc608cba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 6 Jan 2009 18:13:18 +0100 Subject: mac80211: move dynamic PS timeout to hardware config This will be needed for drivers that set the IEEE80211_HW_NO_STACK_DYNAMIC_PS flag and still want to handle dynamic PS. Signed-off-by: Johannes Berg Reviewed-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 1 - net/mac80211/mlme.c | 5 +++-- net/mac80211/tx.c | 4 ++-- net/mac80211/wext.c | 14 ++++++++------ 4 files changed, 13 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fa5ca14517f5..3db6bc3cdaf2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -700,7 +700,6 @@ struct ieee80211_local { unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ bool powersave; - int dynamic_ps_timeout; struct work_struct dynamic_ps_enable_work; struct work_struct dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6a90171c859f..7709e7645671 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -777,9 +777,10 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, if (local->powersave && !(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS)) { - if (local->dynamic_ps_timeout > 0) + if (local->hw.conf.dynamic_ps_timeout > 0) mod_timer(&local->dynamic_ps_timer, jiffies + - msecs_to_jiffies(local->dynamic_ps_timeout)); + msecs_to_jiffies( + local->hw.conf.dynamic_ps_timeout)); else { ieee80211_send_nullfunc(local, sdata, 1); conf->flags |= IEEE80211_CONF_PS; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 96eca341160b..b18a72690119 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1296,7 +1296,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && - local->dynamic_ps_timeout > 0) { + local->hw.conf.dynamic_ps_timeout > 0) { if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, IEEE80211_QUEUE_STOP_REASON_PS); @@ -1305,7 +1305,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) } mod_timer(&local->dynamic_ps_timer, jiffies + - msecs_to_jiffies(local->dynamic_ps_timeout)); + msecs_to_jiffies(local->hw.conf.dynamic_ps_timeout)); } memset(info, 0, sizeof(*info)); diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 3fc1b903bfbc..3f2db0bda46c 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -863,17 +863,19 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, timeout = wrq->value / 1000; set: - if (ps == local->powersave && timeout == local->dynamic_ps_timeout) + if (ps == local->powersave && timeout == conf->dynamic_ps_timeout) return ret; local->powersave = ps; - local->dynamic_ps_timeout = timeout; + conf->dynamic_ps_timeout = timeout; - if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && - (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) { - if (local->dynamic_ps_timeout > 0) + if (local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) { + ret = ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT); + } else if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { + if (conf->dynamic_ps_timeout > 0) mod_timer(&local->dynamic_ps_timer, jiffies + - msecs_to_jiffies(local->dynamic_ps_timeout)); + msecs_to_jiffies(conf->dynamic_ps_timeout)); else { if (local->powersave) { ieee80211_send_nullfunc(local, sdata, 1); -- cgit v1.2.3 From 4be8c3873e0b88397866d3ede578503e188f9ad2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Jan 2009 18:28:20 +0100 Subject: mac80211: extend/document powersave API This modifies hardware flags for powersave to support three different flags: * IEEE80211_HW_SUPPORTS_PS - indicates general PS support * IEEE80211_HW_PS_NULLFUNC_STACK - indicates nullfunc sending in software * IEEE80211_HW_SUPPORTS_DYNAMIC_PS - indicates dynamic PS on the device It also adds documentation for all this which explains how to set the various flags. Additionally, it fixes a few things: * a spot where && was used to test flags * enable CONF_PS only when associated again Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 31 +++++++++++++++---------------- net/mac80211/tx.c | 2 +- net/mac80211/wext.c | 40 ++++++++++++++++++++++++---------------- 3 files changed, 40 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7709e7645671..a1e683e305f0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -775,17 +775,17 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= BSS_CHANGED_BASIC_RATES; ieee80211_bss_info_change_notify(sdata, bss_info_changed); - if (local->powersave && - !(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS)) { - if (local->hw.conf.dynamic_ps_timeout > 0) + if (local->powersave) { + if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) && + local->hw.conf.dynamic_ps_timeout > 0) { mod_timer(&local->dynamic_ps_timer, jiffies + msecs_to_jiffies( local->hw.conf.dynamic_ps_timeout)); - else { - ieee80211_send_nullfunc(local, sdata, 1); + } else { + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) + ieee80211_send_nullfunc(local, sdata, 1); conf->flags |= IEEE80211_CONF_PS; - ieee80211_hw_config(local, - IEEE80211_CONF_CHANGE_PS); + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } } @@ -1779,16 +1779,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); - if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS)) { + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && + local->hw.conf.flags & IEEE80211_CONF_PS) { directed_tim = check_tim(&elems, ifsta->aid, &is_mc); if (directed_tim || is_mc) { - if (local->hw.conf.flags && IEEE80211_CONF_PS) { - local->hw.conf.flags &= ~IEEE80211_CONF_PS; - ieee80211_hw_config(local, - IEEE80211_CONF_CHANGE_PS); - ieee80211_send_nullfunc(local, sdata, 0); - } + local->hw.conf.flags &= ~IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + ieee80211_send_nullfunc(local, sdata, 0); } } @@ -2694,9 +2692,10 @@ void ieee80211_dynamic_ps_enable_work(struct work_struct *work) if (local->hw.conf.flags & IEEE80211_CONF_PS) return; - ieee80211_send_nullfunc(local, sdata, 1); - local->hw.conf.flags |= IEEE80211_CONF_PS; + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) + ieee80211_send_nullfunc(local, sdata, 1); + local->hw.conf.flags |= IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b18a72690119..cd6bc87eec73 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1295,7 +1295,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) return 0; } - if (!(local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) && + if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && local->hw.conf.dynamic_ps_timeout > 0) { if (local->hw.conf.flags & IEEE80211_CONF_PS) { ieee80211_stop_queues_by_reason(&local->hw, diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 3f2db0bda46c..1e5b29bdb3a7 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -837,6 +837,9 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, int ret = 0, timeout = 0; bool ps; + if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) + return -EOPNOTSUPP; + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; @@ -862,32 +865,37 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, if (wrq->flags & IW_POWER_TIMEOUT) timeout = wrq->value / 1000; -set: + set: if (ps == local->powersave && timeout == conf->dynamic_ps_timeout) return ret; local->powersave = ps; conf->dynamic_ps_timeout = timeout; - if (local->hw.flags & IEEE80211_HW_NO_STACK_DYNAMIC_PS) { + if (local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS) ret = ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT); - } else if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { - if (conf->dynamic_ps_timeout > 0) - mod_timer(&local->dynamic_ps_timer, jiffies + - msecs_to_jiffies(conf->dynamic_ps_timeout)); - else { - if (local->powersave) { + + if (!(sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) + return ret; + + if (conf->dynamic_ps_timeout > 0 && + !(local->hw.flags & IEEE80211_HW_SUPPORTS_DYNAMIC_PS)) { + mod_timer(&local->dynamic_ps_timer, jiffies + + msecs_to_jiffies(conf->dynamic_ps_timeout)); + } else { + if (local->powersave) { + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) ieee80211_send_nullfunc(local, sdata, 1); - conf->flags |= IEEE80211_CONF_PS; - ret = ieee80211_hw_config(local, - IEEE80211_CONF_CHANGE_PS); - } else { - conf->flags &= ~IEEE80211_CONF_PS; - ret = ieee80211_hw_config(local, - IEEE80211_CONF_CHANGE_PS); + conf->flags |= IEEE80211_CONF_PS; + ret = ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + } else { + conf->flags &= ~IEEE80211_CONF_PS; + ret = ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) ieee80211_send_nullfunc(local, sdata, 0); - } } } -- cgit v1.2.3 From 560e28e14f69ad3440a6e8c283dcfd37e1e41c2d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 7 Jan 2009 17:43:32 -0800 Subject: cfg80211: call reg_notifier() once We are calling the reg_notifier() callback per band, this is not necessary, just call it once. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index bc494cef2102..0f93d4526f37 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -947,9 +947,9 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) handle_band(wiphy, band); - if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, setby); } + if (wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, setby); } /* Return value which can be used by ignore_request() to indicate -- cgit v1.2.3 From 3e0c3ff36c4c7b9e39af7d600e399664ca04e817 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 7 Jan 2009 17:43:34 -0800 Subject: cfg80211: allow multiple driver regulatory_hints() We add support for multiple drivers to provide a regulatory_hint() on a system by adding a wiphy specific regulatory domain cache. This allows drivers to keep around cache their own regulatory domain structure queried from CRDA. We handle conflicts by intersecting multiple regulatory domains, each driver will stick to its own regulatory domain though unless a country IE has been received and processed. If the user already requested a regulatory domain and a driver requests the same regulatory domain then simply copy to the driver's regd the same regulatory domain and do not call CRDA, do not collect $200. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 104 ++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 92 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0f93d4526f37..5a746cd114a6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -784,6 +784,7 @@ static u32 map_regdom_flags(u32 rd_flags) /** * freq_reg_info - get regulatory information for the given frequency + * @wiphy: the wiphy for which we want to process this rule for * @center_freq: Frequency in KHz for which we want regulatory information for * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one * you can set this to 0. If this frequency is allowed we then set @@ -802,22 +803,31 @@ static u32 map_regdom_flags(u32 rd_flags) * freq_in_rule_band() for our current definition of a band -- this is purely * subjective and right now its 802.11 specific. */ -static int freq_reg_info(u32 center_freq, u32 *bandwidth, +static int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, const struct ieee80211_reg_rule **reg_rule) { int i; bool band_rule_found = false; + const struct ieee80211_regdomain *regd; u32 max_bandwidth = 0; - if (!cfg80211_regdomain) + regd = cfg80211_regdomain; + + /* Follow the driver's regulatory domain, if present, unless a country + * IE has been processed */ + if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE && + wiphy->regd) + regd = wiphy->regd; + + if (!regd) return -EINVAL; - for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { + for (i = 0; i < regd->n_reg_rules; i++) { const struct ieee80211_reg_rule *rr; const struct ieee80211_freq_range *fr = NULL; const struct ieee80211_power_rule *pr = NULL; - rr = &cfg80211_regdomain->reg_rules[i]; + rr = ®d->reg_rules[i]; fr = &rr->freq_range; pr = &rr->power_rule; @@ -859,7 +869,7 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, flags = chan->orig_flags; - r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq), + r = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq), &max_bandwidth, ®_rule); if (r) { @@ -952,6 +962,30 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) wiphy->reg_notifier(wiphy, setby); } +static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, + const struct ieee80211_regdomain *src_regd) +{ + struct ieee80211_regdomain *regd; + int size_of_regd = 0; + unsigned int i; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + ((src_regd->n_reg_rules + 1) * sizeof(struct ieee80211_reg_rule)); + + regd = kzalloc(size_of_regd, GFP_KERNEL); + if (!regd) + return -ENOMEM; + + memcpy(regd, src_regd, sizeof(struct ieee80211_regdomain)); + + for (i = 0; i < src_regd->n_reg_rules; i++) + memcpy(®d->reg_rules[i], &src_regd->reg_rules[i], + sizeof(struct ieee80211_reg_rule)); + + *dst_regd = regd; + return 0; +} + /* Return value which can be used by ignore_request() to indicate * it has been determined we should intersect two regulatory domains */ #define REG_INTERSECT 1 @@ -999,9 +1033,9 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, } return REG_INTERSECT; case REGDOM_SET_BY_DRIVER: - if (last_request->initiator == REGDOM_SET_BY_DRIVER) - return -EALREADY; - return 0; + if (last_request->initiator == REGDOM_SET_BY_CORE) + return 0; + return REG_INTERSECT; case REGDOM_SET_BY_USER: if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) return REG_INTERSECT; @@ -1028,11 +1062,28 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, r = ignore_request(wiphy, set_by, alpha2); - if (r == REG_INTERSECT) + if (r == REG_INTERSECT) { + if (set_by == REGDOM_SET_BY_DRIVER) { + r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); + if (r) + return r; + } intersect = true; - else if (r) + } else if (r) { + /* If the regulatory domain being requested by the + * driver has already been set just copy it to the + * wiphy */ + if (r == -EALREADY && set_by == REGDOM_SET_BY_DRIVER) { + r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); + if (r) + return r; + r = -EALREADY; + goto new_request; + } return r; + } +new_request: request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); if (!request) @@ -1048,6 +1099,11 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, kfree(last_request); last_request = request; + + /* When r == REG_INTERSECT we do need to call CRDA */ + if (r < 0) + return r; + /* * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled * AND if CRDA is NOT present nothing will happen, if someone @@ -1341,6 +1397,23 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) } if (!last_request->intersect) { + int r; + + if (last_request->initiator != REGDOM_SET_BY_DRIVER) { + reset_regdomains(); + cfg80211_regdomain = rd; + return 0; + } + + /* For a driver hint, lets copy the regulatory domain the + * driver wanted to the wiphy to deal with conflicts */ + + BUG_ON(last_request->wiphy->regd); + + r = reg_copy_regd(&last_request->wiphy->regd, rd); + if (r) + return r; + reset_regdomains(); cfg80211_regdomain = rd; return 0; @@ -1354,8 +1427,14 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (!intersected_rd) return -EINVAL; - /* We can trash what CRDA provided now */ - kfree(rd); + /* We can trash what CRDA provided now. + * However if a driver requested this specific regulatory + * domain we keep it for its private use */ + if (last_request->initiator == REGDOM_SET_BY_DRIVER) + last_request->wiphy->regd = rd; + else + kfree(rd); + rd = NULL; reset_regdomains(); @@ -1439,6 +1518,7 @@ int set_regdom(const struct ieee80211_regdomain *rd) /* Caller must hold cfg80211_drv_mutex */ void reg_device_remove(struct wiphy *wiphy) { + kfree(wiphy->regd); if (!last_request || !last_request->wiphy) return; if (last_request->wiphy != wiphy) -- cgit v1.2.3 From 039498c6ec67bd718ac1c8e7f6b4e2cfe2146773 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Wed, 7 Jan 2009 17:43:35 -0800 Subject: cfg80211: fix typo on message after intersection Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5a746cd114a6..b34fd84b3e2f 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1295,7 +1295,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) "domain intersected: \n"); } else printk(KERN_INFO "cfg80211: Current regulatory " - "intersected: \n"); + "domain intersected: \n"); } else if (is_world_regdom(rd->alpha2)) printk(KERN_INFO "cfg80211: World regulatory " "domain updated:\n"); -- cgit v1.2.3 From 5394af4d86ae51b369ff243c3f75b6f9a74e164b Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:31:59 +0200 Subject: mac80211: 802.11w - STA flag for MFP Add flags for setting STA entries and struct ieee80211_if_sta to indicate whether management frame protection (MFP) is used. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 4 ++++ net/mac80211/debugfs_sta.c | 5 +++-- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 7 +++++-- net/mac80211/sta_info.h | 2 ++ 5 files changed, 15 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9d4e4d846ec1..309d9189aa49 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -630,6 +630,10 @@ static void sta_apply_parameters(struct ieee80211_local *local, sta->flags &= ~WLAN_STA_WME; if (params->station_flags & STATION_FLAG_WME) sta->flags |= WLAN_STA_WME; + + sta->flags &= ~WLAN_STA_MFP; + if (params->station_flags & STATION_FLAG_MFP) + sta->flags |= WLAN_STA_MFP; spin_unlock_bh(&sta->lock); } diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index a2fbe0131312..90230c718b5b 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -67,14 +67,15 @@ static ssize_t sta_flags_read(struct file *file, char __user *userbuf, char buf[100]; struct sta_info *sta = file->private_data; u32 staflags = get_sta_flags(sta); - int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s", + int res = scnprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s", staflags & WLAN_STA_AUTH ? "AUTH\n" : "", staflags & WLAN_STA_ASSOC ? "ASSOC\n" : "", staflags & WLAN_STA_PS ? "PS\n" : "", staflags & WLAN_STA_AUTHORIZED ? "AUTHORIZED\n" : "", staflags & WLAN_STA_SHORT_PREAMBLE ? "SHORT PREAMBLE\n" : "", staflags & WLAN_STA_WME ? "WME\n" : "", - staflags & WLAN_STA_WDS ? "WDS\n" : ""); + staflags & WLAN_STA_WDS ? "WDS\n" : "", + staflags & WLAN_STA_MFP ? "MFP\n" : ""); return simple_read_from_buffer(userbuf, count, ppos, buf, res); } STA_OPS(flags); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 3db6bc3cdaf2..b5f86cb17630 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -260,6 +260,7 @@ struct mesh_preq_queue { #define IEEE80211_STA_PRIVACY_INVOKED BIT(13) #define IEEE80211_STA_TKIP_WEP_USED BIT(14) #define IEEE80211_STA_CSA_RECEIVED BIT(15) +#define IEEE80211_STA_MFP_ENABLED BIT(16) /* flags for MLME request */ #define IEEE80211_STA_REQ_SCAN 0 #define IEEE80211_STA_REQ_DIRECT_PROBE 1 diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a1e683e305f0..bc8a7f1a6a15 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1,6 +1,6 @@ /* * BSS client mode implementation - * Copyright 2003, Jouni Malinen + * Copyright 2003-2008, Jouni Malinen * Copyright 2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc @@ -472,7 +472,7 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, /* u.deauth.reason_code == u.disassoc.reason_code */ mgmt->u.deauth.reason_code = cpu_to_le16(reason); - ieee80211_tx_skb(sdata, skb, 0); + ieee80211_tx_skb(sdata, skb, ifsta->flags & IEEE80211_STA_MFP_ENABLED); } /* MLME */ @@ -1408,6 +1408,9 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, rate_control_rate_init(sta); + if (ifsta->flags & IEEE80211_STA_MFP_ENABLED) + set_sta_flags(sta, WLAN_STA_MFP); + if (elems.wmm_param) set_sta_flags(sta, WLAN_STA_WME); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index b683d3f5ef8a..d13a44b935e2 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -34,6 +34,7 @@ * @WLAN_STA_CLEAR_PS_FILT: Clear PS filter in hardware (using the * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next * frame to this station is transmitted. + * @WLAN_STA_MFP: Management frame protection is used with this STA. */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH = 1<<0, @@ -46,6 +47,7 @@ enum ieee80211_sta_info_flags { WLAN_STA_WDS = 1<<7, WLAN_STA_PSPOLL = 1<<8, WLAN_STA_CLEAR_PS_FILT = 1<<9, + WLAN_STA_MFP = 1<<10, }; #define STA_TID_NUM 16 -- cgit v1.2.3 From fb7333367632c67d8b6b06fb8d906cdabb11b02a Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:00 +0200 Subject: mac80211: 802.11w - CCMP for management frames Extend CCMP to support encryption and decryption of unicast management frames. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/tx.c | 23 ++++++++++++++++++++++- net/mac80211/wpa.c | 18 ++++++++++++------ 2 files changed, 34 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index cd6bc87eec73..50c6c4fabea5 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -330,6 +330,22 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) return TX_CONTINUE; } +static int ieee80211_use_mfp(__le16 fc, struct sta_info *sta, + struct sk_buff *skb) +{ + if (!ieee80211_is_mgmt(fc)) + return 0; + + if (sta == NULL || !test_sta_flags(sta, WLAN_STA_MFP)) + return 0; + + if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) + skb->data)) + return 0; + + return 1; +} + static ieee80211_tx_result ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) { @@ -428,10 +444,15 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) if (ieee80211_is_auth(hdr->frame_control)) break; case ALG_TKIP: - case ALG_CCMP: if (!ieee80211_is_data_present(hdr->frame_control)) tx->key = NULL; break; + case ALG_CCMP: + if (!ieee80211_is_data_present(hdr->frame_control) && + !ieee80211_use_mfp(hdr->frame_control, tx->sta, + tx->skb)) + tx->key = NULL; + break; } } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 7aa63caf8d50..aff46adde3f0 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -266,7 +266,7 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, int encrypted) { __le16 mask_fc; - int a4_included; + int a4_included, mgmt; u8 qos_tid; u8 *b_0, *aad; u16 data_len, len_a; @@ -277,12 +277,15 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, aad = scratch + 4 * AES_BLOCK_LEN; /* - * Mask FC: zero subtype b4 b5 b6 + * Mask FC: zero subtype b4 b5 b6 (if not mgmt) * Retry, PwrMgt, MoreData; set Protected */ + mgmt = ieee80211_is_mgmt(hdr->frame_control); mask_fc = hdr->frame_control; - mask_fc &= ~cpu_to_le16(0x0070 | IEEE80211_FCTL_RETRY | + mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY | IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA); + if (!mgmt) + mask_fc &= ~cpu_to_le16(0x0070); mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -300,8 +303,10 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *scratch, /* First block, b_0 */ b_0[0] = 0x59; /* flags: Adata: 1, M: 011, L: 001 */ - /* Nonce: QoS Priority | A2 | PN */ - b_0[1] = qos_tid; + /* Nonce: Nonce Flags | A2 | PN + * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7) + */ + b_0[1] = qos_tid | (mgmt << 4); memcpy(&b_0[2], hdr->addr2, ETH_ALEN); memcpy(&b_0[8], pn, CCMP_PN_LEN); /* l(m) */ @@ -446,7 +451,8 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (!ieee80211_is_data(hdr->frame_control)) + if (!ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_robust_mgmt_frame(hdr)) return RX_CONTINUE; data_len = skb->len - hdrlen - CCMP_HDR_LEN - CCMP_MIC_LEN; -- cgit v1.2.3 From 765cb46a3fc856245ea68a7c961ac87c77e4ae2d Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:01 +0200 Subject: mac80211: 802.11w - Add BIP (AES-128-CMAC) Implement Broadcast/Multicast Integrity Protocol for management frame protection. This patch adds the needed definitions for the new information element (MMIE) and implementation for the new "encryption" type (though, BIP is actually not encrypting data, it provides only integrity protection). These routines will be used by a follow-on patch that enables BIP for multicast/broadcast robust management frames. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/Makefile | 1 + net/mac80211/aes_cmac.c | 135 +++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/aes_cmac.h | 19 +++++++ net/mac80211/ieee80211_i.h | 2 +- net/mac80211/key.h | 10 ++++ net/mac80211/wpa.c | 125 +++++++++++++++++++++++++++++++++++++++++ net/mac80211/wpa.h | 5 ++ 7 files changed, 296 insertions(+), 1 deletion(-) create mode 100644 net/mac80211/aes_cmac.c create mode 100644 net/mac80211/aes_cmac.h (limited to 'net') diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 7d4971aa443f..5c6fadfb6a00 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -15,6 +15,7 @@ mac80211-y := \ michael.o \ tkip.o \ aes_ccm.o \ + aes_cmac.o \ cfg.o \ rx.o \ spectmgmt.o \ diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c new file mode 100644 index 000000000000..3d097b3d7b62 --- /dev/null +++ b/net/mac80211/aes_cmac.c @@ -0,0 +1,135 @@ +/* + * AES-128-CMAC with TLen 16 for IEEE 802.11w BIP + * Copyright 2008, Jouni Malinen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include "key.h" +#include "aes_cmac.h" + +#define AES_BLOCK_SIZE 16 +#define AES_CMAC_KEY_LEN 16 +#define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */ +#define AAD_LEN 20 + + +static void gf_mulx(u8 *pad) +{ + int i, carry; + + carry = pad[0] & 0x80; + for (i = 0; i < AES_BLOCK_SIZE - 1; i++) + pad[i] = (pad[i] << 1) | (pad[i + 1] >> 7); + pad[AES_BLOCK_SIZE - 1] <<= 1; + if (carry) + pad[AES_BLOCK_SIZE - 1] ^= 0x87; +} + + +static void aes_128_cmac_vector(struct crypto_cipher *tfm, u8 *scratch, + size_t num_elem, + const u8 *addr[], const size_t *len, u8 *mac) +{ + u8 *cbc, *pad; + const u8 *pos, *end; + size_t i, e, left, total_len; + + cbc = scratch; + pad = scratch + AES_BLOCK_SIZE; + + memset(cbc, 0, AES_BLOCK_SIZE); + + total_len = 0; + for (e = 0; e < num_elem; e++) + total_len += len[e]; + left = total_len; + + e = 0; + pos = addr[0]; + end = pos + len[0]; + + while (left >= AES_BLOCK_SIZE) { + for (i = 0; i < AES_BLOCK_SIZE; i++) { + cbc[i] ^= *pos++; + if (pos >= end) { + e++; + pos = addr[e]; + end = pos + len[e]; + } + } + if (left > AES_BLOCK_SIZE) + crypto_cipher_encrypt_one(tfm, cbc, cbc); + left -= AES_BLOCK_SIZE; + } + + memset(pad, 0, AES_BLOCK_SIZE); + crypto_cipher_encrypt_one(tfm, pad, pad); + gf_mulx(pad); + + if (left || total_len == 0) { + for (i = 0; i < left; i++) { + cbc[i] ^= *pos++; + if (pos >= end) { + e++; + pos = addr[e]; + end = pos + len[e]; + } + } + cbc[left] ^= 0x80; + gf_mulx(pad); + } + + for (i = 0; i < AES_BLOCK_SIZE; i++) + pad[i] ^= cbc[i]; + crypto_cipher_encrypt_one(tfm, pad, pad); + memcpy(mac, pad, CMAC_TLEN); +} + + +void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad, + const u8 *data, size_t data_len, u8 *mic) +{ + const u8 *addr[3]; + size_t len[3]; + u8 zero[CMAC_TLEN]; + + memset(zero, 0, CMAC_TLEN); + addr[0] = aad; + len[0] = AAD_LEN; + addr[1] = data; + len[1] = data_len - CMAC_TLEN; + addr[2] = zero; + len[2] = CMAC_TLEN; + + aes_128_cmac_vector(tfm, scratch, 3, addr, len, mic); +} + + +struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]) +{ + struct crypto_cipher *tfm; + + tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + return NULL; + + crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN); + + return tfm; +} + + +void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm) +{ + if (tfm) + crypto_free_cipher(tfm); +} diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h new file mode 100644 index 000000000000..0eb9a4831508 --- /dev/null +++ b/net/mac80211/aes_cmac.h @@ -0,0 +1,19 @@ +/* + * Copyright 2008, Jouni Malinen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef AES_CMAC_H +#define AES_CMAC_H + +#include + +struct crypto_cipher * ieee80211_aes_cmac_key_setup(const u8 key[]); +void ieee80211_aes_cmac(struct crypto_cipher *tfm, u8 *scratch, const u8 *aad, + const u8 *data, size_t data_len, u8 *mic); +void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm); + +#endif /* AES_CMAC_H */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b5f86cb17630..20af92abd61d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -43,7 +43,7 @@ struct ieee80211_local; /* Required encryption head and tailroom */ #define IEEE80211_ENCRYPT_HEADROOM 8 -#define IEEE80211_ENCRYPT_TAILROOM 12 +#define IEEE80211_ENCRYPT_TAILROOM 18 /* IEEE 802.11 (Ch. 9.5 Defragmentation) requires support for concurrent * reception of at least three fragmented frames. This limit can be increased diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 425816e0996c..73ac28ca2ede 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -96,6 +96,16 @@ struct ieee80211_key { u8 tx_crypto_buf[6 * AES_BLOCK_LEN]; u8 rx_crypto_buf[6 * AES_BLOCK_LEN]; } ccmp; + struct { + u8 tx_pn[6]; + u8 rx_pn[6]; + struct crypto_cipher *tfm; + u32 replays; /* dot11RSNAStatsCMACReplays */ + u32 icverrors; /* dot11RSNAStatsCMACICVErrors */ + /* scratch buffers for virt_to_page() (crypto API) */ + u8 tx_crypto_buf[2 * AES_BLOCK_LEN]; + u8 rx_crypto_buf[2 * AES_BLOCK_LEN]; + } aes_cmac; } u; /* number of times this key has been used */ diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index aff46adde3f0..53e11e6ff66e 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -1,5 +1,6 @@ /* * Copyright 2002-2004, Instant802 Networks, Inc. + * Copyright 2008, Jouni Malinen * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,6 +20,7 @@ #include "michael.h" #include "tkip.h" #include "aes_ccm.h" +#include "aes_cmac.h" #include "wpa.h" ieee80211_tx_result @@ -491,3 +493,126 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx) return RX_CONTINUE; } + + +static void bip_aad(struct sk_buff *skb, u8 *aad) +{ + /* BIP AAD: FC(masked) || A1 || A2 || A3 */ + + /* FC type/subtype */ + aad[0] = skb->data[0]; + /* Mask FC Retry, PwrMgt, MoreData flags to zero */ + aad[1] = skb->data[1] & ~(BIT(4) | BIT(5) | BIT(6)); + /* A1 || A2 || A3 */ + memcpy(aad + 2, skb->data + 4, 3 * ETH_ALEN); +} + + +static inline void bip_ipn_swap(u8 *d, const u8 *s) +{ + *d++ = s[5]; + *d++ = s[4]; + *d++ = s[3]; + *d++ = s[2]; + *d++ = s[1]; + *d = s[0]; +} + + +ieee80211_tx_result +ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) +{ + struct sk_buff *skb = tx->skb; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_key *key = tx->key; + struct ieee80211_mmie *mmie; + u8 *pn, aad[20]; + int i; + + if (tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { + /* hwaccel */ + info->control.hw_key = &tx->key->conf; + return 0; + } + + if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) + return TX_DROP; + + mmie = (struct ieee80211_mmie *) skb_put(skb, sizeof(*mmie)); + mmie->element_id = WLAN_EID_MMIE; + mmie->length = sizeof(*mmie) - 2; + mmie->key_id = cpu_to_le16(key->conf.keyidx); + + /* PN = PN + 1 */ + pn = key->u.aes_cmac.tx_pn; + + for (i = sizeof(key->u.aes_cmac.tx_pn) - 1; i >= 0; i--) { + pn[i]++; + if (pn[i]) + break; + } + bip_ipn_swap(mmie->sequence_number, pn); + + bip_aad(skb, aad); + + /* + * MIC = AES-128-CMAC(IGTK, AAD || Management Frame Body || MMIE, 64) + */ + ieee80211_aes_cmac(key->u.aes_cmac.tfm, key->u.aes_cmac.tx_crypto_buf, + aad, skb->data + 24, skb->len - 24, mmie->mic); + + return TX_CONTINUE; +} + + +ieee80211_rx_result +ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx) +{ + struct sk_buff *skb = rx->skb; + struct ieee80211_key *key = rx->key; + struct ieee80211_mmie *mmie; + u8 aad[20], mic[8], ipn[6]; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + + if (!ieee80211_is_mgmt(hdr->frame_control)) + return RX_CONTINUE; + + if ((rx->status->flag & RX_FLAG_DECRYPTED) && + (rx->status->flag & RX_FLAG_IV_STRIPPED)) + return RX_CONTINUE; + + if (skb->len < 24 + sizeof(*mmie)) + return RX_DROP_UNUSABLE; + + mmie = (struct ieee80211_mmie *) + (skb->data + skb->len - sizeof(*mmie)); + if (mmie->element_id != WLAN_EID_MMIE || + mmie->length != sizeof(*mmie) - 2) + return RX_DROP_UNUSABLE; /* Invalid MMIE */ + + bip_ipn_swap(ipn, mmie->sequence_number); + + if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) { + key->u.aes_cmac.replays++; + return RX_DROP_UNUSABLE; + } + + if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { + /* hardware didn't decrypt/verify MIC */ + bip_aad(skb, aad); + ieee80211_aes_cmac(key->u.aes_cmac.tfm, + key->u.aes_cmac.rx_crypto_buf, aad, + skb->data + 24, skb->len - 24, mic); + if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) { + key->u.aes_cmac.icverrors++; + return RX_DROP_UNUSABLE; + } + } + + memcpy(key->u.aes_cmac.rx_pn, ipn, 6); + + /* Remove MMIE */ + skb_trim(skb, skb->len - sizeof(*mmie)); + + return RX_CONTINUE; +} diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h index d42d221d8a1d..baba0608313e 100644 --- a/net/mac80211/wpa.h +++ b/net/mac80211/wpa.h @@ -28,4 +28,9 @@ ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx); ieee80211_rx_result ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx); +ieee80211_tx_result +ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx); +ieee80211_rx_result +ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx); + #endif /* WPA_H */ -- cgit v1.2.3 From 3cfcf6ac6d69dc290e96416731eea5c88ac7d426 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:02 +0200 Subject: mac80211: 802.11w - Use BIP (AES-128-CMAC) Add mechanism for managing BIP keys (IGTK) and integrate BIP into the TX/RX paths. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 31 ++++++++++++++++ net/mac80211/debugfs_key.c | 79 ++++++++++++++++++++++++++++++++++++++-- net/mac80211/debugfs_key.h | 10 ++++++ net/mac80211/ieee80211_i.h | 5 ++- net/mac80211/key.c | 62 ++++++++++++++++++++++++++++++-- net/mac80211/key.h | 6 ++++ net/mac80211/rx.c | 90 ++++++++++++++++++++++++++++++++++++++++++---- net/mac80211/tx.c | 9 +++++ net/wireless/nl80211.c | 29 +++++++++++---- 9 files changed, 302 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 309d9189aa49..72c106915433 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -133,6 +133,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case WLAN_CIPHER_SUITE_CCMP: alg = ALG_CCMP; break; + case WLAN_CIPHER_SUITE_AES_CMAC: + alg = ALG_AES_CMAC; + break; default: return -EINVAL; } @@ -275,6 +278,17 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, else params.cipher = WLAN_CIPHER_SUITE_WEP104; break; + case ALG_AES_CMAC: + params.cipher = WLAN_CIPHER_SUITE_AES_CMAC; + seq[0] = key->u.aes_cmac.tx_pn[5]; + seq[1] = key->u.aes_cmac.tx_pn[4]; + seq[2] = key->u.aes_cmac.tx_pn[3]; + seq[3] = key->u.aes_cmac.tx_pn[2]; + seq[4] = key->u.aes_cmac.tx_pn[1]; + seq[5] = key->u.aes_cmac.tx_pn[0]; + params.seq = seq; + params.seq_len = 6; + break; } params.key = key->conf.key; @@ -304,6 +318,22 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, return 0; } +static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, + struct net_device *dev, + u8 key_idx) +{ + struct ieee80211_sub_if_data *sdata; + + rcu_read_lock(); + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + ieee80211_set_default_mgmt_key(sdata, key_idx); + + rcu_read_unlock(); + + return 0; +} + static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) { struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -1153,6 +1183,7 @@ struct cfg80211_ops mac80211_config_ops = { .del_key = ieee80211_del_key, .get_key = ieee80211_get_key, .set_default_key = ieee80211_config_default_key, + .set_default_mgmt_key = ieee80211_config_default_mgmt_key, .add_beacon = ieee80211_add_beacon, .set_beacon = ieee80211_set_beacon, .del_beacon = ieee80211_del_beacon, diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index 6424ac565ae0..99c752588b30 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -76,6 +76,9 @@ static ssize_t key_algorithm_read(struct file *file, case ALG_CCMP: alg = "CCMP\n"; break; + case ALG_AES_CMAC: + alg = "AES-128-CMAC\n"; + break; default: return 0; } @@ -105,6 +108,12 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); break; + case ALG_AES_CMAC: + tpn = key->u.aes_cmac.tx_pn; + len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", + tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], + tpn[5]); + break; default: return 0; } @@ -142,6 +151,14 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, } len = p - buf; break; + case ALG_AES_CMAC: + rpn = key->u.aes_cmac.rx_pn; + p += scnprintf(p, sizeof(buf)+buf-p, + "%02x%02x%02x%02x%02x%02x\n", + rpn[0], rpn[1], rpn[2], + rpn[3], rpn[4], rpn[5]); + len = p - buf; + break; default: return 0; } @@ -156,13 +173,40 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf, char buf[20]; int len; - if (key->conf.alg != ALG_CCMP) + switch (key->conf.alg) { + case ALG_CCMP: + len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); + break; + case ALG_AES_CMAC: + len = scnprintf(buf, sizeof(buf), "%u\n", + key->u.aes_cmac.replays); + break; + default: return 0; - len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); + } return simple_read_from_buffer(userbuf, count, ppos, buf, len); } KEY_OPS(replays); +static ssize_t key_icverrors_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct ieee80211_key *key = file->private_data; + char buf[20]; + int len; + + switch (key->conf.alg) { + case ALG_AES_CMAC: + len = scnprintf(buf, sizeof(buf), "%u\n", + key->u.aes_cmac.icverrors); + break; + default: + return 0; + } + return simple_read_from_buffer(userbuf, count, ppos, buf, len); +} +KEY_OPS(icverrors); + static ssize_t key_key_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -222,6 +266,7 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key) DEBUGFS_ADD(tx_spec); DEBUGFS_ADD(rx_spec); DEBUGFS_ADD(replays); + DEBUGFS_ADD(icverrors); DEBUGFS_ADD(key); DEBUGFS_ADD(ifindex); }; @@ -243,6 +288,7 @@ void ieee80211_debugfs_key_remove(struct ieee80211_key *key) DEBUGFS_DEL(tx_spec); DEBUGFS_DEL(rx_spec); DEBUGFS_DEL(replays); + DEBUGFS_DEL(icverrors); DEBUGFS_DEL(key); DEBUGFS_DEL(ifindex); @@ -280,6 +326,35 @@ void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata) sdata->common_debugfs.default_key = NULL; } +void ieee80211_debugfs_key_add_mgmt_default(struct ieee80211_sub_if_data *sdata) +{ + char buf[50]; + struct ieee80211_key *key; + + if (!sdata->debugfsdir) + return; + + /* this is running under the key lock */ + + key = sdata->default_mgmt_key; + if (key) { + sprintf(buf, "../keys/%d", key->debugfs.cnt); + sdata->common_debugfs.default_mgmt_key = + debugfs_create_symlink("default_mgmt_key", + sdata->debugfsdir, buf); + } else + ieee80211_debugfs_key_remove_mgmt_default(sdata); +} + +void ieee80211_debugfs_key_remove_mgmt_default(struct ieee80211_sub_if_data *sdata) +{ + if (!sdata) + return; + + debugfs_remove(sdata->common_debugfs.default_mgmt_key); + sdata->common_debugfs.default_mgmt_key = NULL; +} + void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, struct sta_info *sta) { diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h index b1a3754ee240..54717b4e1371 100644 --- a/net/mac80211/debugfs_key.h +++ b/net/mac80211/debugfs_key.h @@ -6,6 +6,10 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key); void ieee80211_debugfs_key_remove(struct ieee80211_key *key); void ieee80211_debugfs_key_add_default(struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_key_remove_default(struct ieee80211_sub_if_data *sdata); +void ieee80211_debugfs_key_add_mgmt_default( + struct ieee80211_sub_if_data *sdata); +void ieee80211_debugfs_key_remove_mgmt_default( + struct ieee80211_sub_if_data *sdata); void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, struct sta_info *sta); #else @@ -19,6 +23,12 @@ static inline void ieee80211_debugfs_key_add_default( static inline void ieee80211_debugfs_key_remove_default( struct ieee80211_sub_if_data *sdata) {} +static inline void ieee80211_debugfs_key_add_mgmt_default( + struct ieee80211_sub_if_data *sdata) +{} +static inline void ieee80211_debugfs_key_remove_mgmt_default( + struct ieee80211_sub_if_data *sdata) +{} static inline void ieee80211_debugfs_key_sta_del(struct ieee80211_key *key, struct sta_info *sta) {} diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 20af92abd61d..8c3245717c55 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -409,8 +409,10 @@ struct ieee80211_sub_if_data { unsigned int fragment_next; #define NUM_DEFAULT_KEYS 4 - struct ieee80211_key *keys[NUM_DEFAULT_KEYS]; +#define NUM_DEFAULT_MGMT_KEYS 2 + struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; struct ieee80211_key *default_key; + struct ieee80211_key *default_mgmt_key; u16 sequence_number; @@ -482,6 +484,7 @@ struct ieee80211_sub_if_data { } debugfs; struct { struct dentry *default_key; + struct dentry *default_mgmt_key; } common_debugfs; #ifdef CONFIG_MAC80211_MESH diff --git a/net/mac80211/key.c b/net/mac80211/key.c index b0a025c9b615..19b480de4bbc 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -18,6 +18,7 @@ #include "ieee80211_i.h" #include "debugfs_key.h" #include "aes_ccm.h" +#include "aes_cmac.h" /** @@ -215,13 +216,38 @@ void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) spin_unlock_irqrestore(&sdata->local->key_lock, flags); } +static void +__ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, int idx) +{ + struct ieee80211_key *key = NULL; + + if (idx >= NUM_DEFAULT_KEYS && + idx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) + key = sdata->keys[idx]; + + rcu_assign_pointer(sdata->default_mgmt_key, key); + + if (key) + add_todo(key, KEY_FLAG_TODO_DEFMGMTKEY); +} + +void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, + int idx) +{ + unsigned long flags; + + spin_lock_irqsave(&sdata->local->key_lock, flags); + __ieee80211_set_default_mgmt_key(sdata, idx); + spin_unlock_irqrestore(&sdata->local->key_lock, flags); +} + static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_key *old, struct ieee80211_key *new) { - int idx, defkey; + int idx, defkey, defmgmtkey; if (new) list_add(&new->list, &sdata->key_list); @@ -237,13 +263,19 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, idx = new->conf.keyidx; defkey = old && sdata->default_key == old; + defmgmtkey = old && sdata->default_mgmt_key == old; if (defkey && !new) __ieee80211_set_default_key(sdata, -1); + if (defmgmtkey && !new) + __ieee80211_set_default_mgmt_key(sdata, -1); rcu_assign_pointer(sdata->keys[idx], new); if (defkey && new) __ieee80211_set_default_key(sdata, new->conf.keyidx); + if (defmgmtkey && new) + __ieee80211_set_default_mgmt_key(sdata, + new->conf.keyidx); } if (old) { @@ -262,7 +294,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, { struct ieee80211_key *key; - BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS); + BUG_ON(idx < 0 || idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS); key = kzalloc(sizeof(struct ieee80211_key) + key_len, GFP_KERNEL); if (!key) @@ -291,6 +323,10 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.iv_len = CCMP_HDR_LEN; key->conf.icv_len = CCMP_MIC_LEN; break; + case ALG_AES_CMAC: + key->conf.iv_len = 0; + key->conf.icv_len = sizeof(struct ieee80211_mmie); + break; } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); @@ -308,6 +344,19 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } } + if (alg == ALG_AES_CMAC) { + /* + * Initialize AES key state here as an optimization so that + * it does not need to be initialized for every packet. + */ + key->u.aes_cmac.tfm = + ieee80211_aes_cmac_key_setup(key_data); + if (!key->u.aes_cmac.tfm) { + kfree(key); + return NULL; + } + } + return key; } @@ -461,6 +510,8 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) if (key->conf.alg == ALG_CCMP) ieee80211_aes_key_free(key->u.ccmp.tfm); + if (key->conf.alg == ALG_AES_CMAC) + ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); ieee80211_debugfs_key_remove(key); kfree(key); @@ -483,6 +534,7 @@ static void __ieee80211_key_todo(void) list_del_init(&key->todo); todoflags = key->flags & (KEY_FLAG_TODO_ADD_DEBUGFS | KEY_FLAG_TODO_DEFKEY | + KEY_FLAG_TODO_DEFMGMTKEY | KEY_FLAG_TODO_HWACCEL_ADD | KEY_FLAG_TODO_HWACCEL_REMOVE | KEY_FLAG_TODO_DELETE); @@ -500,6 +552,11 @@ static void __ieee80211_key_todo(void) ieee80211_debugfs_key_add_default(key->sdata); work_done = true; } + if (todoflags & KEY_FLAG_TODO_DEFMGMTKEY) { + ieee80211_debugfs_key_remove_mgmt_default(key->sdata); + ieee80211_debugfs_key_add_mgmt_default(key->sdata); + work_done = true; + } if (todoflags & KEY_FLAG_TODO_HWACCEL_ADD) { ieee80211_key_enable_hw_accel(key); work_done = true; @@ -535,6 +592,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata) ieee80211_key_lock(); ieee80211_debugfs_key_remove_default(sdata); + ieee80211_debugfs_key_remove_mgmt_default(sdata); spin_lock_irqsave(&sdata->local->key_lock, flags); list_for_each_entry_safe(key, tmp, &sdata->key_list, list) diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 73ac28ca2ede..215d3ef42a4f 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -46,6 +46,8 @@ struct sta_info; * acceleration. * @KEY_FLAG_TODO_DEFKEY: Key is default key and debugfs needs to be updated. * @KEY_FLAG_TODO_ADD_DEBUGFS: Key needs to be added to debugfs. + * @KEY_FLAG_TODO_DEFMGMTKEY: Key is default management key and debugfs needs + * to be updated. */ enum ieee80211_internal_key_flags { KEY_FLAG_UPLOADED_TO_HARDWARE = BIT(0), @@ -54,6 +56,7 @@ enum ieee80211_internal_key_flags { KEY_FLAG_TODO_HWACCEL_REMOVE = BIT(3), KEY_FLAG_TODO_DEFKEY = BIT(4), KEY_FLAG_TODO_ADD_DEBUGFS = BIT(5), + KEY_FLAG_TODO_DEFMGMTKEY = BIT(6), }; struct tkip_ctx { @@ -124,6 +127,7 @@ struct ieee80211_key { struct dentry *tx_spec; struct dentry *rx_spec; struct dentry *replays; + struct dentry *icverrors; struct dentry *key; struct dentry *ifindex; int cnt; @@ -150,6 +154,8 @@ void ieee80211_key_link(struct ieee80211_key *key, struct sta_info *sta); void ieee80211_key_free(struct ieee80211_key *key); void ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx); +void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, + int idx); void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); void ieee80211_disable_keys(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b68e082e99ce..abc3aa583ca6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -446,6 +446,52 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) return RX_CONTINUE; } + +static int ieee80211_is_unicast_robust_mgmt_frame(struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + + if (skb->len < 24 || is_multicast_ether_addr(hdr->addr1)) + return 0; + + return ieee80211_is_robust_mgmt_frame(hdr); +} + + +static int ieee80211_is_multicast_robust_mgmt_frame(struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + + if (skb->len < 24 || !is_multicast_ether_addr(hdr->addr1)) + return 0; + + return ieee80211_is_robust_mgmt_frame(hdr); +} + + +/* Get the BIP key index from MMIE; return -1 if this is not a BIP frame */ +static int ieee80211_get_mmie_keyidx(struct sk_buff *skb) +{ + struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data; + struct ieee80211_mmie *mmie; + + if (skb->len < 24 + sizeof(*mmie) || + !is_multicast_ether_addr(hdr->da)) + return -1; + + if (!ieee80211_is_robust_mgmt_frame((struct ieee80211_hdr *) hdr)) + return -1; /* not a robust management frame */ + + mmie = (struct ieee80211_mmie *) + (skb->data + skb->len - sizeof(*mmie)); + if (mmie->element_id != WLAN_EID_MMIE || + mmie->length != sizeof(*mmie) - 2) + return -1; + + return le16_to_cpu(mmie->key_id); +} + + static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) { @@ -561,21 +607,23 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) int hdrlen; ieee80211_rx_result result = RX_DROP_UNUSABLE; struct ieee80211_key *stakey = NULL; + int mmie_keyidx = -1; /* * Key selection 101 * - * There are three types of keys: + * There are four types of keys: * - GTK (group keys) + * - IGTK (group keys for management frames) * - PTK (pairwise keys) * - STK (station-to-station pairwise keys) * * When selecting a key, we have to distinguish between multicast * (including broadcast) and unicast frames, the latter can only - * use PTKs and STKs while the former always use GTKs. Unless, of - * course, actual WEP keys ("pre-RSNA") are used, then unicast - * frames can also use key indizes like GTKs. Hence, if we don't - * have a PTK/STK we check the key index for a WEP key. + * use PTKs and STKs while the former always use GTKs and IGTKs. + * Unless, of course, actual WEP keys ("pre-RSNA") are used, then + * unicast frames can also use key indices like GTKs. Hence, if we + * don't have a PTK/STK we check the key index for a WEP key. * * Note that in a regular BSS, multicast frames are sent by the * AP only, associated stations unicast the frame to the AP first @@ -588,8 +636,14 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * possible. */ - if (!ieee80211_has_protected(hdr->frame_control)) - return RX_CONTINUE; + if (!ieee80211_has_protected(hdr->frame_control)) { + if (!ieee80211_is_mgmt(hdr->frame_control) || + rx->sta == NULL || !test_sta_flags(rx->sta, WLAN_STA_MFP)) + return RX_CONTINUE; + mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); + if (mmie_keyidx < 0) + return RX_CONTINUE; + } /* * No point in finding a key and decrypting if the frame is neither @@ -603,6 +657,16 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (!is_multicast_ether_addr(hdr->addr1) && stakey) { rx->key = stakey; + } else if (mmie_keyidx >= 0) { + /* Broadcast/multicast robust management frame / BIP */ + if ((rx->status->flag & RX_FLAG_DECRYPTED) && + (rx->status->flag & RX_FLAG_IV_STRIPPED)) + return RX_CONTINUE; + + if (mmie_keyidx < NUM_DEFAULT_KEYS || + mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) + return RX_DROP_MONITOR; /* unexpected BIP keyidx */ + rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); } else { /* * The device doesn't give us the IV so we won't be @@ -665,6 +729,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) case ALG_CCMP: result = ieee80211_crypto_ccmp_decrypt(rx); break; + case ALG_AES_CMAC: + result = ieee80211_crypto_aes_cmac_decrypt(rx); + break; } /* either the frame has been decrypted or will be dropped */ @@ -1112,6 +1179,15 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) /* Drop unencrypted frames if key is set. */ if (unlikely(!ieee80211_has_protected(fc) && !ieee80211_is_nullfunc(fc) && + (!ieee80211_is_mgmt(fc) || + (ieee80211_is_unicast_robust_mgmt_frame(rx->skb) && + rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP))) && + (rx->key || rx->sdata->drop_unencrypted))) + return -EACCES; + /* BIP does not use Protected field, so need to check MMIE */ + if (unlikely(rx->sta && test_sta_flags(rx->sta, WLAN_STA_MFP) && + ieee80211_is_multicast_robust_mgmt_frame(rx->skb) && + ieee80211_get_mmie_keyidx(rx->skb) < 0 && (rx->key || rx->sdata->drop_unencrypted))) return -EACCES; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 50c6c4fabea5..ad53ea9e9c77 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -425,6 +425,9 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = NULL; else if (tx->sta && (key = rcu_dereference(tx->sta->key))) tx->key = key; + else if (ieee80211_is_mgmt(hdr->frame_control) && + (key = rcu_dereference(tx->sdata->default_mgmt_key))) + tx->key = key; else if ((key = rcu_dereference(tx->sdata->default_key))) tx->key = key; else if (tx->sdata->drop_unencrypted && @@ -453,6 +456,10 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->skb)) tx->key = NULL; break; + case ALG_AES_CMAC: + if (!ieee80211_is_mgmt(hdr->frame_control)) + tx->key = NULL; + break; } } @@ -808,6 +815,8 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) return ieee80211_crypto_tkip_encrypt(tx); case ALG_CCMP: return ieee80211_crypto_ccmp_encrypt(tx); + case ALG_AES_CMAC: + return ieee80211_crypto_aes_cmac_encrypt(tx); } /* not reached */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1e728fff474e..123d3b160fad 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -738,7 +738,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_KEY_IDX]) key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); - if (key_idx > 3) + if (key_idx > 5) return -EINVAL; if (info->attrs[NL80211_ATTR_MAC]) @@ -804,30 +804,41 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) int err; struct net_device *dev; u8 key_idx; + int (*func)(struct wiphy *wiphy, struct net_device *netdev, + u8 key_index); if (!info->attrs[NL80211_ATTR_KEY_IDX]) return -EINVAL; key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); - if (key_idx > 3) + if (info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) { + if (key_idx < 4 || key_idx > 5) + return -EINVAL; + } else if (key_idx > 3) return -EINVAL; /* currently only support setting default key */ - if (!info->attrs[NL80211_ATTR_KEY_DEFAULT]) + if (!info->attrs[NL80211_ATTR_KEY_DEFAULT] && + !info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) return -EINVAL; err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; - if (!drv->ops->set_default_key) { + if (info->attrs[NL80211_ATTR_KEY_DEFAULT]) + func = drv->ops->set_default_key; + else + func = drv->ops->set_default_mgmt_key; + + if (!func) { err = -EOPNOTSUPP; goto out; } rtnl_lock(); - err = drv->ops->set_default_key(&drv->wiphy, dev, key_idx); + err = func(&drv->wiphy, dev, key_idx); rtnl_unlock(); out: @@ -863,7 +874,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (key_idx > 3) + if (key_idx > 5) return -EINVAL; /* @@ -894,6 +905,10 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (params.key_len != 13) return -EINVAL; break; + case WLAN_CIPHER_SUITE_AES_CMAC: + if (params.key_len != 16) + return -EINVAL; + break; default: return -EINVAL; } @@ -928,7 +943,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_KEY_IDX]) key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); - if (key_idx > 3) + if (key_idx > 5) return -EINVAL; if (info->attrs[NL80211_ATTR_MAC]) -- cgit v1.2.3 From 54604d3a827b37525ef017adba313c7112e0f484 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:03 +0200 Subject: mac80211: 802.11w - WEXT parameter for setting mgmt cipher Add a new IW_AUTH parameter for setting cipher suite for multicast/broadcast management frames. This is for full-mac drivers that take care of RSN IE generation for (re)association request frames. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/wext.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 1e5b29bdb3a7..c3b2dd5706fb 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -927,6 +927,7 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, case IW_AUTH_WPA_ENABLED: case IW_AUTH_RX_UNENCRYPTED_EAPOL: case IW_AUTH_KEY_MGMT: + case IW_AUTH_CIPHER_GROUP_MGMT: break; case IW_AUTH_CIPHER_PAIRWISE: if (sdata->vif.type == NL80211_IFTYPE_STATION) { -- cgit v1.2.3 From 22787dbaa3b952602542506e0426ea6d5f104042 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:04 +0200 Subject: mac80211: 802.11w - WEXT configuration for IGTK Added new SIOCSIWENCODEEXT algorithm for configuring BIP (AES-CMAC) keys (IGTK). Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/wext.c | 62 +++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 48 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index c3b2dd5706fb..7ba1d5ba3afa 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -37,7 +37,14 @@ static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta struct ieee80211_key *key; int err; - if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { + if (alg == ALG_AES_CMAC) { + if (idx < NUM_DEFAULT_KEYS || + idx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) { + printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d " + "(BIP)\n", sdata->dev->name, idx); + return -EINVAL; + } + } else if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", sdata->dev->name, idx); return -EINVAL; @@ -103,6 +110,9 @@ static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta if (set_tx_key || (!sta && !sdata->default_key && key)) ieee80211_set_default_key(sdata, idx); + if (alg == ALG_AES_CMAC && + (set_tx_key || (!sta && !sdata->default_mgmt_key && key))) + ieee80211_set_default_mgmt_key(sdata, idx); } out_unlock: @@ -1048,6 +1058,9 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, case IW_ENCODE_ALG_CCMP: alg = ALG_CCMP; break; + case IW_ENCODE_ALG_AES_CMAC: + alg = ALG_AES_CMAC; + break; default: return -EOPNOTSUPP; } @@ -1056,20 +1069,41 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, remove = 1; idx = erq->flags & IW_ENCODE_INDEX; - if (idx < 1 || idx > 4) { - idx = -1; - if (!sdata->default_key) - idx = 0; - else for (i = 0; i < NUM_DEFAULT_KEYS; i++) { - if (sdata->default_key == sdata->keys[i]) { - idx = i; - break; + if (alg == ALG_AES_CMAC) { + if (idx < NUM_DEFAULT_KEYS + 1 || + idx > NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) { + idx = -1; + if (!sdata->default_mgmt_key) + idx = 0; + else for (i = NUM_DEFAULT_KEYS; + i < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS; + i++) { + if (sdata->default_mgmt_key == sdata->keys[i]) + { + idx = i; + break; + } } - } - if (idx < 0) - return -EINVAL; - } else - idx--; + if (idx < 0) + return -EINVAL; + } else + idx--; + } else { + if (idx < 1 || idx > 4) { + idx = -1; + if (!sdata->default_key) + idx = 0; + else for (i = 0; i < NUM_DEFAULT_KEYS; i++) { + if (sdata->default_key == sdata->keys[i]) { + idx = i; + break; + } + } + if (idx < 0) + return -EINVAL; + } else + idx--; + } return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg, remove, -- cgit v1.2.3 From fdfacf0ae2e8339098b1164d2317b792d7662c0a Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:05 +0200 Subject: mac80211: 802.11w - Configuration of MFP disabled/optional/required Add new WEXT IW_AUTH_* parameter for setting MFP disabled/optional/required. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 6 ++++++ net/mac80211/mlme.c | 4 ++++ net/mac80211/wext.c | 7 +++++++ 3 files changed, 17 insertions(+) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8c3245717c55..212c732fbba7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -320,6 +320,12 @@ struct ieee80211_if_sta { int auth_alg; /* currently used IEEE 802.11 authentication algorithm */ int auth_transaction; + enum { + IEEE80211_MFP_DISABLED, + IEEE80211_MFP_OPTIONAL, + IEEE80211_MFP_REQUIRED + } mfp; /* management frame protection */ + unsigned long ibss_join_req; struct sk_buff *probe_resp; /* ProbeResp template for IBSS */ u32 supp_rates_bits[IEEE80211_NUM_BANDS]; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bc8a7f1a6a15..42c5f981c715 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2317,6 +2317,10 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, selected->ssid_len); ieee80211_sta_set_bssid(sdata, selected->bssid); ieee80211_sta_def_wmm_params(sdata, selected); + if (sdata->u.sta.mfp == IEEE80211_MFP_REQUIRED) + sdata->u.sta.flags |= IEEE80211_STA_MFP_ENABLED; + else + sdata->u.sta.flags &= ~IEEE80211_STA_MFP_ENABLED; /* Send out direct probe if no probe resp was received or * the one we have is outdated diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 7ba1d5ba3afa..2dd387495dfe 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -975,6 +975,13 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, else ret = -EOPNOTSUPP; break; + case IW_AUTH_MFP: + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) + sdata->u.sta.mfp = data->value; + else + ret = -EOPNOTSUPP; + break; default: ret = -EOPNOTSUPP; break; -- cgit v1.2.3 From fea147328908b7e2bfcaf9dc4377909d5507ca35 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:06 +0200 Subject: mac80211: 802.11w - SA Query processing Process SA Query Requests for client mode in mac80211. AP side processing of SA Query Response frames is in user space (hostapd). Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index abc3aa583ca6..63db89aef3e4 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1667,6 +1667,57 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) return RX_CONTINUE; } +void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *resp; + + if (compare_ether_addr(mgmt->da, sdata->dev->dev_addr) != 0) { + /* Not to own unicast address */ + return; + } + + if (compare_ether_addr(mgmt->sa, sdata->u.sta.bssid) != 0 || + compare_ether_addr(mgmt->bssid, sdata->u.sta.bssid) != 0) { + /* Not from the current AP. */ + return; + } + + if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATE) { + /* Association in progress; ignore SA Query */ + return; + } + + if (len < 24 + 1 + sizeof(resp->u.action.u.sa_query)) { + /* Too short SA Query request frame */ + return; + } + + skb = dev_alloc_skb(sizeof(*resp) + local->hw.extra_tx_headroom); + if (skb == NULL) + return; + + skb_reserve(skb, local->hw.extra_tx_headroom); + resp = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(resp, 0, 24); + memcpy(resp->da, mgmt->sa, ETH_ALEN); + memcpy(resp->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(resp->bssid, sdata->u.sta.bssid, ETH_ALEN); + resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); + resp->u.action.category = WLAN_CATEGORY_SA_QUERY; + resp->u.action.u.sa_query.action = WLAN_ACTION_SA_QUERY_RESPONSE; + memcpy(resp->u.action.u.sa_query.trans_id, + mgmt->u.action.u.sa_query.trans_id, + WLAN_SA_QUERY_TR_ID_LEN); + + ieee80211_tx_skb(sdata, skb, 1); +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { @@ -1743,6 +1794,24 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; } break; + case WLAN_CATEGORY_SA_QUERY: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.sa_query))) + return RX_DROP_MONITOR; + switch (mgmt->u.action.u.sa_query.action) { + case WLAN_ACTION_SA_QUERY_REQUEST: + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return RX_DROP_MONITOR; + ieee80211_process_sa_query_req(sdata, mgmt, len); + break; + case WLAN_ACTION_SA_QUERY_RESPONSE: + /* + * SA Query response is currently only used in AP mode + * and it is processed in user space. + */ + return RX_CONTINUE; + } + break; default: return RX_CONTINUE; } -- cgit v1.2.3 From 1acc97b63a3f32481ebbb4e831323e9aa8834f66 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:07 +0200 Subject: mac80211: 802.11w - Do not force Action frames to disable encryption When sending out Action frames, allow ieee80211_tx_skb() to send them without enforcing do_not_encrypt. These frames will be encrypted if MFP has been negotiated. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ht.c | 6 +++--- net/mac80211/mesh_hwmp.c | 4 ++-- net/mac80211/mesh_plink.c | 2 +- net/mac80211/spectmgmt.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 832adf888ac3..6be485264236 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -202,7 +202,7 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); - ieee80211_tx_skb(sdata, skb, 0); + ieee80211_tx_skb(sdata, skb, 1); } static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, @@ -248,7 +248,7 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - ieee80211_tx_skb(sdata, skb, 0); + ieee80211_tx_skb(sdata, skb, 1); } static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, @@ -291,7 +291,7 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, mgmt->u.action.u.delba.params = cpu_to_le16(params); mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); - ieee80211_tx_skb(sdata, skb, 0); + ieee80211_tx_skb(sdata, skb, 1); } void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn) diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 71fe60961230..3f1785c1bacb 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -149,7 +149,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, pos += ETH_ALEN; memcpy(pos, &dst_dsn, 4); - ieee80211_tx_skb(sdata, skb, 0); + ieee80211_tx_skb(sdata, skb, 1); return 0; } @@ -198,7 +198,7 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, pos += ETH_ALEN; memcpy(pos, &dst_dsn, 4); - ieee80211_tx_skb(sdata, skb, 0); + ieee80211_tx_skb(sdata, skb, 1); return 0; } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 1159bdb4119c..8a6c02ba1620 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -218,7 +218,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, memcpy(pos, &reason, 2); } - ieee80211_tx_skb(sdata, skb, 0); + ieee80211_tx_skb(sdata, skb, 1); return 0; } diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 22ad4808e01a..8396b5a77e8d 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -65,7 +65,7 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; - ieee80211_tx_skb(sdata, skb, 0); + ieee80211_tx_skb(sdata, skb, 1); } void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, -- cgit v1.2.3 From 97ebe12a035e11f8af7a06a34f4d848f9b2f0b49 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:08 +0200 Subject: mac80211: 802.11w - Drop unprotected robust management frames if MFP is used Use ieee80211_drop_unencrypted() to decide whether a received frame should be dropped with management frames, too. If MFP is negotiated, unprotected robust management frames will be dropped. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 63db89aef3e4..57ce697e3251 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1737,6 +1737,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_MONITOR; + if (ieee80211_drop_unencrypted(rx, mgmt->frame_control)) + return RX_DROP_MONITOR; + /* all categories we currently handle have action_code */ if (len < IEEE80211_MIN_ACTION_SIZE + 1) return RX_DROP_MONITOR; @@ -1825,10 +1828,14 @@ static ieee80211_rx_result debug_noinline ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_MONITOR; + if (ieee80211_drop_unencrypted(rx, mgmt->frame_control)) + return RX_DROP_MONITOR; + if (ieee80211_vif_is_mesh(&sdata->vif)) return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status); -- cgit v1.2.3 From 63a5ab82255a4ff5d0783f16427210f1d45d7ec8 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:09 +0200 Subject: mac80211: 802.11w - Implement Association Comeback processing When MFP is enabled, the AP does not allow a STA to associate if an existing security association exists without first going through SA Query process. When this happens, the association request is denied with a new status code ("temporarily rejected") ans Association Comeback IE is used to notify when the association may be tried again (i.e., when the SA Query procedure has timed out). Use the comeback time to update the mac80211 client MLME timer for next association attempt to minimize waiting time if association is temporarily rejected. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 20 +++++++++++++++++--- net/mac80211/util.c | 4 ++++ 3 files changed, 23 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 212c732fbba7..9112c5247c35 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -820,6 +820,7 @@ struct ieee802_11_elems { u8 *country_elem; u8 *pwr_constr_elem; u8 *quiet_elem; /* first quite element */ + u8 *assoc_comeback; /* length of them, respectively */ u8 ssid_len; @@ -847,6 +848,7 @@ struct ieee802_11_elems { u8 pwr_constr_elem_len; u8 quiet_elem_len; u8 num_of_quiet_elem; /* can be more the one */ + u8 assoc_comeback_len; }; static inline struct ieee80211_local *hw_to_local( diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 42c5f981c715..82c598a83687 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1275,6 +1275,23 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, sdata->dev->name, reassoc ? "Rea" : "A", mgmt->sa, capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); + pos = mgmt->u.assoc_resp.variable; + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); + + if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY && + elems.assoc_comeback && elems.assoc_comeback_len == 4) { + u32 tu, ms; + tu = get_unaligned_le32(elems.assoc_comeback); + ms = tu * 1024 / 1000; + printk(KERN_DEBUG "%s: AP rejected association temporarily; " + "comeback duration %u TU (%u ms)\n", + sdata->dev->name, tu, ms); + if (ms > IEEE80211_ASSOC_TIMEOUT) + mod_timer(&ifsta->timer, + jiffies + msecs_to_jiffies(ms)); + return; + } + if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", sdata->dev->name, status_code); @@ -1290,9 +1307,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, "set\n", sdata->dev->name, aid); aid &= ~(BIT(15) | BIT(14)); - pos = mgmt->u.assoc_resp.variable; - ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); - if (!elems.supp_rates) { printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n", sdata->dev->name); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 5cd430333f08..963e0473205c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -653,6 +653,10 @@ void ieee802_11_parse_elems(u8 *start, size_t len, elems->pwr_constr_elem = pos; elems->pwr_constr_elem_len = elen; break; + case WLAN_EID_ASSOC_COMEBACK_TIME: + elems->assoc_comeback = pos; + elems->assoc_comeback_len = elen; + break; default: break; } -- cgit v1.2.3 From 1f7d77ab69789980dad44e1af7afd3a68cd48276 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:10 +0200 Subject: mac80211: 802.11w - Optional software CCMP for management frames If driver/firmware/hardware does not support CCMP for management frames, it can now request mac80211 to take care of encrypting and decrypting management frames (when MFP is enabled) in software. The will need to add this new IEEE80211_KEY_FLAG_SW_MGMT flag when a CCMP key is being configured for TX side and return the undecrypted frames on RX side without RX_FLAG_DECRYPTED flag to use software CCMP for management frames (but hardware for data frames). Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/wpa.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 53e11e6ff66e..9101b48ec2ae 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -367,9 +367,14 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) int hdrlen, len, tail; u8 *pos, *pn; int i; + bool skip_hw; + + skip_hw = (tx->key->conf.flags & IEEE80211_KEY_FLAG_SW_MGMT) && + ieee80211_is_mgmt(hdr->frame_control); if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && - !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { + !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) && + !skip_hw) { /* hwaccel - with no need for preallocated room for CCMP * header or MIC fields */ info->control.hw_key = &tx->key->conf; @@ -404,7 +409,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) ccmp_pn2hdr(pos, pn, key->conf.keyidx); - if (key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { + if ((key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !skip_hw) { /* hwaccel - with preallocated room for CCMP header */ info->control.hw_key = &tx->key->conf; return 0; -- cgit v1.2.3 From 4375d08350e3661d5e8860d33eea084e47ba01cf Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 8 Jan 2009 13:32:11 +0200 Subject: mac80211: 802.11w - Add driver capability flag for MFP This allows user space to determine whether a driver supports MFP and behave properly without having to ask user to configure this in MFP-optional mode. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/wext.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 2dd387495dfe..70a29b657b61 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -976,6 +976,10 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, ret = -EOPNOTSUPP; break; case IW_AUTH_MFP: + if (!(sdata->local->hw.flags & IEEE80211_HW_MFP_CAPABLE)) { + ret = -EOPNOTSUPP; + break; + } if (sdata->vif.type == NL80211_IFTYPE_STATION || sdata->vif.type == NL80211_IFTYPE_ADHOC) sdata->u.sta.mfp = data->value; -- cgit v1.2.3 From a8302de934b5d1897ff146cd0c7ab87d1417c092 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 9 Jan 2009 18:14:15 +0530 Subject: mac80211: Handle power constraint level advertised in 11d+h beacon This patch uses power constraint level while determining the maximum transmit power, there by it makes sure that any power mitigation requirement for the channel in the current regulatory domain is met. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 4 ++++ net/mac80211/main.c | 10 ++++++++-- net/mac80211/mlme.c | 9 +++++++++ net/mac80211/spectmgmt.c | 21 +++++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9112c5247c35..c9ffadb55d36 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -715,6 +715,7 @@ struct ieee80211_local { struct timer_list dynamic_ps_timer; int user_power_level; /* in dBm */ + int power_constr_level; /* in dBm */ #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { @@ -985,6 +986,9 @@ void ieee80211_chswitch_work(struct work_struct *work); void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel_sw_ie *sw_elem, struct ieee80211_bss *bss); +void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, + u16 capab_info, u8 *pwr_constr_elem, + u8 pwr_constr_elem_len); /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e9f3e85d1a9e..c78304db475e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -214,10 +214,16 @@ int ieee80211_hw_config(struct ieee80211_local *local, u32 changed) changed |= IEEE80211_CONF_CHANGE_CHANNEL; } - if (!local->user_power_level) + if (local->sw_scanning) power = chan->max_power; else - power = min(chan->max_power, local->user_power_level); + power = local->power_constr_level ? + (chan->max_power - local->power_constr_level) : + chan->max_power; + + if (local->user_power_level) + power = min(power, local->user_power_level); + if (local->hw.conf.power_level != power) { changed |= IEEE80211_CONF_CHANGE_POWER; local->hw.conf.power_level = power; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 82c598a83687..f0d42498c257 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -905,6 +905,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, /* channel(_type) changes are handled by ieee80211_hw_config */ local->oper_channel_type = NL80211_CHAN_NO_HT; + local->power_constr_level = 0; + del_timer_sync(&local->dynamic_ps_timer); cancel_work_sync(&local->dynamic_ps_enable_work); @@ -1849,6 +1851,13 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, * for the BSSID we are associated to */ regulatory_hint_11d(local->hw.wiphy, elems.country_elem, elems.country_elem_len); + + /* TODO: IBSS also needs this */ + if (elems.pwr_constr_elem) + ieee80211_handle_pwr_constr(sdata, + le16_to_cpu(mgmt->u.probe_resp.capab_info), + elems.pwr_constr_elem, + elems.pwr_constr_elem_len); } ieee80211_bss_info_change_notify(sdata, changed); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 8396b5a77e8d..8d4ec2968f8f 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -161,3 +161,24 @@ void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, jiffies + msecs_to_jiffies(sw_elem->count * bss->beacon_int)); } } + +void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, + u16 capab_info, u8 *pwr_constr_elem, + u8 pwr_constr_elem_len) +{ + struct ieee80211_conf *conf = &sdata->local->hw.conf; + + if (!(capab_info & WLAN_CAPABILITY_SPECTRUM_MGMT)) + return; + + /* Power constraint IE length should be 1 octet */ + if (pwr_constr_elem_len != 1) + return; + + if ((*pwr_constr_elem <= conf->channel->max_power) && + (*pwr_constr_elem != sdata->local->power_constr_level)) { + sdata->local->power_constr_level = *pwr_constr_elem; + ieee80211_hw_config(sdata->local, 0); + } +} + -- cgit v1.2.3 From f4f727a6c84a6ba8f099b84b2a9f0b2ceddc1c8a Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 10 Jan 2009 11:46:53 +0200 Subject: mac80211: Mark ieee80211_process_sa_query_req() static This function is only used within rx.c, so mark it static. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/rx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 57ce697e3251..b648c4550d98 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1667,9 +1667,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) return RX_CONTINUE; } -void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, - size_t len) +static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; -- cgit v1.2.3 From ebe6c7ba9b63539d3b1daba1a8ef4cc9ed0f6941 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 10 Jan 2009 11:47:33 +0200 Subject: mac80211: Fix radiotap header it_present on big endian CPUs When the IEEE80211_RADIOTAP_RATE flag was moved to be conditional, it was mistakenly left without cpu_to_le32(). Fix that. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b648c4550d98..19ffc8ef1d1d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -158,7 +158,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, */ *pos = 0; } else { - rthdr->it_present |= (1 << IEEE80211_RADIOTAP_RATE); + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); *pos = rate->bitrate / 5; } pos++; -- cgit v1.2.3 From 9aed3cc124343d92be6697e9af3928bdfe8eb03e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 13 Jan 2009 16:03:29 +0200 Subject: nl80211: New command for adding extra IE(s) into management frames A new nl80211 command, NL80211_CMD_SET_MGMT_EXTRA_IE, can be used to add arbitrary IE data into the end of management frames. The interface allows extra IEs to be configured for each management frame subtype, but only some of them (ProbeReq, ProbeResp, Auth, (Re)AssocReq, Deauth, Disassoc) are currently accepted in mac80211 implementation. This makes it easier to implement IEEE 802.11 extensions like WPS and FT that add IE(s) into some management frames. In addition, this can be useful for testing and experimentation purposes. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 16 +++++++++ net/mac80211/iface.c | 7 ++++ net/mac80211/mlme.c | 52 +++++++++++++++++++++++++---- net/wireless/nl80211.c | 47 ++++++++++++++++++++++++++ 5 files changed, 198 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 72c106915433..d1ac3ab2c515 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1175,6 +1175,87 @@ static int ieee80211_set_channel(struct wiphy *wiphy, return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } +static int set_mgmt_extra_ie_sta(struct ieee80211_if_sta *ifsta, u8 subtype, + u8 *ies, size_t ies_len) +{ + switch (subtype) { + case IEEE80211_STYPE_PROBE_REQ >> 4: + kfree(ifsta->ie_probereq); + ifsta->ie_probereq = ies; + ifsta->ie_probereq_len = ies_len; + return 0; + case IEEE80211_STYPE_PROBE_RESP >> 4: + kfree(ifsta->ie_proberesp); + ifsta->ie_proberesp = ies; + ifsta->ie_proberesp_len = ies_len; + return 0; + case IEEE80211_STYPE_AUTH >> 4: + kfree(ifsta->ie_auth); + ifsta->ie_auth = ies; + ifsta->ie_auth_len = ies_len; + return 0; + case IEEE80211_STYPE_ASSOC_REQ >> 4: + kfree(ifsta->ie_assocreq); + ifsta->ie_assocreq = ies; + ifsta->ie_assocreq_len = ies_len; + return 0; + case IEEE80211_STYPE_REASSOC_REQ >> 4: + kfree(ifsta->ie_reassocreq); + ifsta->ie_reassocreq = ies; + ifsta->ie_reassocreq_len = ies_len; + return 0; + case IEEE80211_STYPE_DEAUTH >> 4: + kfree(ifsta->ie_deauth); + ifsta->ie_deauth = ies; + ifsta->ie_deauth_len = ies_len; + return 0; + case IEEE80211_STYPE_DISASSOC >> 4: + kfree(ifsta->ie_disassoc); + ifsta->ie_disassoc = ies; + ifsta->ie_disassoc_len = ies_len; + return 0; + } + + return -EOPNOTSUPP; +} + +static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy, + struct net_device *dev, + struct mgmt_extra_ie_params *params) +{ + struct ieee80211_sub_if_data *sdata; + u8 *ies; + size_t ies_len; + int ret = -EOPNOTSUPP; + + if (params->ies) { + ies = kmemdup(params->ies, params->ies_len, GFP_KERNEL); + if (ies == NULL) + return -ENOMEM; + ies_len = params->ies_len; + } else { + ies = NULL; + ies_len = 0; + } + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + ret = set_mgmt_extra_ie_sta(&sdata->u.sta, params->subtype, + ies, ies_len); + break; + default: + ret = -EOPNOTSUPP; + break; + } + + if (ret) + kfree(ies); + return ret; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1204,4 +1285,5 @@ struct cfg80211_ops mac80211_config_ops = { .change_bss = ieee80211_change_bss, .set_txq_params = ieee80211_set_txq_params, .set_channel = ieee80211_set_channel, + .set_mgmt_extra_ie = ieee80211_set_mgmt_extra_ie, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c9ffadb55d36..5eafd3affe27 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -331,6 +331,22 @@ struct ieee80211_if_sta { u32 supp_rates_bits[IEEE80211_NUM_BANDS]; int wmm_last_param_set; + + /* Extra IE data for management frames */ + u8 *ie_probereq; + size_t ie_probereq_len; + u8 *ie_proberesp; + size_t ie_proberesp_len; + u8 *ie_auth; + size_t ie_auth_len; + u8 *ie_assocreq; + size_t ie_assocreq_len; + u8 *ie_reassocreq; + size_t ie_reassocreq_len; + u8 *ie_deauth; + size_t ie_deauth_len; + u8 *ie_disassoc; + size_t ie_disassoc_len; }; struct ieee80211_if_mesh { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 5d5a029228be..8dc2c2188d92 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -632,6 +632,13 @@ static void ieee80211_teardown_sdata(struct net_device *dev) kfree(sdata->u.sta.assocreq_ies); kfree(sdata->u.sta.assocresp_ies); kfree_skb(sdata->u.sta.probe_resp); + kfree(sdata->u.sta.ie_probereq); + kfree(sdata->u.sta.ie_proberesp); + kfree(sdata->u.sta.ie_auth); + kfree(sdata->u.sta.ie_assocreq); + kfree(sdata->u.sta.ie_reassocreq); + kfree(sdata->u.sta.ie_deauth); + kfree(sdata->u.sta.ie_disassoc); break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index f0d42498c257..43da6227b37c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -131,6 +131,12 @@ u64 ieee80211_sta_get_rates(struct ieee80211_local *local, /* frame sending functions */ +static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len) +{ + if (ies) + memcpy(skb_put(skb, ies_len), ies, ies_len); +} + /* also used by scanning code */ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u8 *ssid, size_t ssid_len) @@ -142,7 +148,8 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u8 *pos, *supp_rates, *esupp_rates = NULL; int i; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200); + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + + sdata->u.sta.ie_probereq_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for probe " "request\n", sdata->dev->name); @@ -189,6 +196,9 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, *pos = rate->bitrate / 5; } + add_extra_ies(skb, sdata->u.sta.ie_probereq, + sdata->u.sta.ie_probereq_len); + ieee80211_tx_skb(sdata, skb, 0); } @@ -202,7 +212,8 @@ static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*mgmt) + 6 + extra_len); + sizeof(*mgmt) + 6 + extra_len + + sdata->u.sta.ie_auth_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for auth " "frame\n", sdata->dev->name); @@ -225,6 +236,7 @@ static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, mgmt->u.auth.status_code = cpu_to_le16(0); if (extra) memcpy(skb_put(skb, extra_len), extra, extra_len); + add_extra_ies(skb, sdata->u.sta.ie_auth, sdata->u.sta.ie_auth_len); ieee80211_tx_skb(sdata, skb, encrypt); } @@ -235,17 +247,26 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos, *ies, *ht_ie; + u8 *pos, *ies, *ht_ie, *e_ies; int i, len, count, rates_len, supp_rates_len; u16 capab; struct ieee80211_bss *bss; int wmm = 0; struct ieee80211_supported_band *sband; u64 rates = 0; + size_t e_ies_len; + + if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { + e_ies = sdata->u.sta.ie_reassocreq; + e_ies_len = sdata->u.sta.ie_reassocreq_len; + } else { + e_ies = sdata->u.sta.ie_assocreq; + e_ies_len = sdata->u.sta.ie_assocreq_len; + } skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + ifsta->extra_ie_len + - ifsta->ssid_len); + ifsta->ssid_len + e_ies_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " "frame\n", sdata->dev->name); @@ -436,6 +457,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); } + add_extra_ies(skb, e_ies, e_ies_len); + kfree(ifsta->assocreq_ies); ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); @@ -453,8 +476,19 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta = &sdata->u.sta; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; + u8 *ies; + size_t ies_len; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); + if (stype == IEEE80211_STYPE_DEAUTH) { + ies = sdata->u.sta.ie_deauth; + ies_len = sdata->u.sta.ie_deauth_len; + } else { + ies = sdata->u.sta.ie_disassoc; + ies_len = sdata->u.sta.ie_disassoc_len; + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + + ies_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for " "deauth/disassoc frame\n", sdata->dev->name); @@ -472,6 +506,8 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, /* u.deauth.reason_code == u.disassoc.reason_code */ mgmt->u.deauth.reason_code = cpu_to_le16(reason); + add_extra_ies(skb, ies, ies_len); + ieee80211_tx_skb(sdata, skb, ifsta->flags & IEEE80211_STA_MFP_ENABLED); } @@ -1473,7 +1509,8 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband; union iwreq_data wrqu; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + + sdata->u.sta.ie_proberesp_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for probe " "response\n", sdata->dev->name); @@ -1556,6 +1593,9 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, memcpy(pos, &bss->supp_rates[8], rates); } + add_extra_ies(skb, sdata->u.sta.ie_proberesp, + sdata->u.sta.ie_proberesp_len); + ifsta->probe_resp = skb; ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 123d3b160fad..09a5d0f1d6dc 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -105,6 +105,10 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, .len = NL80211_HT_CAPABILITY_LEN }, + + [NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 }, + [NL80211_ATTR_IE] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_DATA_LEN }, }; /* message building helper */ @@ -2149,6 +2153,43 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } +static int nl80211_set_mgmt_extra_ie(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + struct net_device *dev; + struct mgmt_extra_ie_params params; + + memset(¶ms, 0, sizeof(params)); + + if (!info->attrs[NL80211_ATTR_MGMT_SUBTYPE]) + return -EINVAL; + params.subtype = nla_get_u8(info->attrs[NL80211_ATTR_MGMT_SUBTYPE]); + if (params.subtype > 15) + return -EINVAL; /* FC Subtype field is 4 bits (0..15) */ + + if (info->attrs[NL80211_ATTR_IE]) { + params.ies = nla_data(info->attrs[NL80211_ATTR_IE]); + params.ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + if (drv->ops->set_mgmt_extra_ie) { + rtnl_lock(); + err = drv->ops->set_mgmt_extra_ie(&drv->wiphy, dev, ¶ms); + rtnl_unlock(); + } else + err = -EOPNOTSUPP; + + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -2310,6 +2351,12 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_MGMT_EXTRA_IE, + .doit = nl80211_set_mgmt_extra_ie, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; /* multicast groups */ -- cgit v1.2.3 From e9648179706448d50884f172711b00a6e5ab9e42 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 15 Jan 2009 17:41:16 +0800 Subject: mac80211: cleanup kmalloc/memset -> kcalloc Transform calls kmalloc/memset to a single kcalloc. Signed-off-by: Wei Yongjun Signed-off-by: John W. Linville --- net/mac80211/ht.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 6be485264236..7a38d2e76ca9 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -950,7 +950,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, /* prepare reordering buffer */ tid_agg_rx->reorder_buf = - kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC); + kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); if (!tid_agg_rx->reorder_buf) { #ifdef CONFIG_MAC80211_HT_DEBUG if (net_ratelimit()) @@ -960,8 +960,6 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, kfree(sta->ampdu_mlme.tid_rx[tid]); goto end; } - memset(tid_agg_rx->reorder_buf, 0, - buf_size * sizeof(struct sk_buff *)); if (local->ops->ampdu_action) ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, -- cgit v1.2.3 From 9cf2d186e4c52308cad8ecd893924e22ed020605 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 19 Jan 2009 13:50:27 +0200 Subject: mac80211: remove mesh_plink_close() method. This patch removes mesh_plink_close() method as it is unused. Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- net/mac80211/mesh.h | 1 - net/mac80211/mesh_plink.c | 30 ------------------------------ 2 files changed, 31 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index c197ab545e54..c5b0b5833468 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -243,7 +243,6 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_broken(struct sta_info *sta); void mesh_plink_deactivate(struct sta_info *sta); int mesh_plink_open(struct sta_info *sta); -int mesh_plink_close(struct sta_info *sta); void mesh_plink_block(struct sta_info *sta); void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 8a6c02ba1620..c140a1b71a5e 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -361,36 +361,6 @@ void mesh_plink_block(struct sta_info *sta) spin_unlock_bh(&sta->lock); } -int mesh_plink_close(struct sta_info *sta) -{ - struct ieee80211_sub_if_data *sdata = sta->sdata; - __le16 llid, plid, reason; - - mpl_dbg("Mesh plink: closing link with %pM\n", sta->sta.addr); - spin_lock_bh(&sta->lock); - sta->reason = cpu_to_le16(MESH_LINK_CANCELLED); - reason = sta->reason; - - if (sta->plink_state == PLINK_LISTEN || - sta->plink_state == PLINK_BLOCKED) { - mesh_plink_fsm_restart(sta); - spin_unlock_bh(&sta->lock); - return 0; - } else if (sta->plink_state == PLINK_ESTAB) { - __mesh_plink_deactivate(sta); - /* The timer should not be running */ - mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); - } else if (!mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata))) - sta->ignore_plink_timer = true; - - sta->plink_state = PLINK_HOLDING; - llid = sta->llid; - plid = sta->plid; - spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->sta.addr, llid, - plid, reason); - return 0; -} void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) -- cgit v1.2.3 From eb80ed8d1fc0f3005ab356fbd8d61d870e3038e6 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 19 Jan 2009 13:50:32 +0200 Subject: mac80211: trivial documentation fixes (enum mesh_path_flags). This patch fixes documentation of enum mesh_path_flags in mesh.h. Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- net/mac80211/mesh.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index c5b0b5833468..f1196f5c3efe 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -24,15 +24,15 @@ * * * - * @MESH_PATH_ACTIVE: the mesh path is can be used for forwarding - * @MESH_PATH_RESOLVED: the discovery process is running for this mesh path + * @MESH_PATH_ACTIVE: the mesh path can be used for forwarding + * @MESH_PATH_RESOLVING: the discovery process is running for this mesh path * @MESH_PATH_DSN_VALID: the mesh path contains a valid destination sequence * number * @MESH_PATH_FIXED: the mesh path has been manually set and should not be * modified * @MESH_PATH_RESOLVED: the mesh path can has been resolved * - * MESH_PATH_RESOLVED and MESH_PATH_DELETE are used by the mesh path timer to + * MESH_PATH_RESOLVED is used by the mesh path timer to * decide when to stop or cancel the mesh path discovery. */ enum mesh_path_flags { -- cgit v1.2.3 From 2182b830fe0258477d469429d2dfb5702b84587e Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Mon, 19 Jan 2009 13:50:37 +0200 Subject: mac80211: trivial documentation fix (mesh_nexthop_lookup()). This patch fixes the documentation of mesh_nexthop_lookup() in mesh_hwmp.c. Signed-off-by: Rami Rosen Signed-off-by: John W. Linville --- net/mac80211/mesh_hwmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 3f1785c1bacb..4f862b2a0041 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -759,7 +759,7 @@ enddiscovery: } /** - * ieee80211s_lookup_nexthop - put the appropriate next hop on a mesh frame + * mesh_nexthop_lookup - put the appropriate next hop on a mesh frame * * @skb: 802.11 frame to be sent * @sdata: network subif the frame will be sent through -- cgit v1.2.3 From e0463f501fb945c1fde536d98eefc5ba156ff497 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 19 Jan 2009 16:52:00 +0200 Subject: mac80211: Fix drop-unencrypted for management frames ADDBA request Action frame was sent out before 4-way handshake was completed and the initial 802.11w code ended up dropping the frame even if MFP was not enabled. While the sending of Action frames this early is not really a good idea (will break with MFP enabled), we should not break this for the MFP disabled case. This patch fixes ieee80211_tx_h_select_key() not to drop management frames if MFP is disabled. If MFP is enabled, Action frames will be dropped before keys are set per IEEE 802.11w/D7.0. Other robust management frames (i.e., Deauthentication and Disassociation frames) are allowed unprotected prior to key configuration. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/tx.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ad53ea9e9c77..7b013fb0d27f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -432,7 +432,10 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = key; else if (tx->sdata->drop_unencrypted && (tx->skb->protocol != cpu_to_be16(ETH_P_PAE)) && - !(info->flags & IEEE80211_TX_CTL_INJECTED)) { + !(info->flags & IEEE80211_TX_CTL_INJECTED) && + (!ieee80211_is_robust_mgmt_frame(hdr) || + (ieee80211_is_action(hdr->frame_control) && + tx->sta && test_sta_flags(tx->sta, WLAN_STA_MFP)))) { I802_DEBUG_INC(tx->local->tx_handlers_drop_unencrypted); return TX_DROP; } else -- cgit v1.2.3 From 0378b3f1c49d48ed524eabda7e4340163d9483c9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 19 Jan 2009 11:20:52 -0500 Subject: cfg80211: add PM hooks This should help implement suspend/resume in mac80211, these hooks will be run before the device is suspended and after it resumes. Therefore, they can touch the hardware as much as they want to. Signed-off-by: Johannes Berg Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- net/wireless/sysfs.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'net') diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 79a382877641..26a72b0797a0 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -55,6 +55,34 @@ static int wiphy_uevent(struct device *dev, struct kobj_uevent_env *env) } #endif +static int wiphy_suspend(struct device *dev, pm_message_t state) +{ + struct cfg80211_registered_device *rdev = dev_to_rdev(dev); + int ret = 0; + + if (rdev->ops->suspend) { + rtnl_lock(); + ret = rdev->ops->suspend(&rdev->wiphy); + rtnl_unlock(); + } + + return ret; +} + +static int wiphy_resume(struct device *dev) +{ + struct cfg80211_registered_device *rdev = dev_to_rdev(dev); + int ret = 0; + + if (rdev->ops->resume) { + rtnl_lock(); + ret = rdev->ops->resume(&rdev->wiphy); + rtnl_unlock(); + } + + return ret; +} + struct class ieee80211_class = { .name = "ieee80211", .owner = THIS_MODULE, @@ -63,6 +91,8 @@ struct class ieee80211_class = { #ifdef CONFIG_HOTPLUG .dev_uevent = wiphy_uevent, #endif + .suspend = wiphy_suspend, + .resume = wiphy_resume, }; int wiphy_sysfs_init(void) -- cgit v1.2.3 From 665af4fc8979734d8f73c9a6732be07e545ce4cc Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Mon, 19 Jan 2009 11:20:53 -0500 Subject: mac80211: add suspend/resume callbacks This patch introduces suspend and resume callbacks to mac80211, allowing mac80211 to quiesce its state (bringing down interfaces, removing keys, etc) in preparation for suspend. cfg80211 will call the suspend hook before the device suspend, and resume hook after the device resume. Signed-off-by: Bob Copeland Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/Makefile | 2 + net/mac80211/cfg.c | 17 +++++++ net/mac80211/ieee80211_i.h | 4 ++ net/mac80211/pm.c | 114 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 137 insertions(+) create mode 100644 net/mac80211/pm.c (limited to 'net') diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 5c6fadfb6a00..58c94bb38e87 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -38,6 +38,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \ mesh_plink.o \ mesh_hwmp.o +mac80211-$(CONFIG_PM) += pm.o + # objects for PID algorithm rc80211_pid-y := rc80211_pid_algo.o rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index d1ac3ab2c515..3527de22cafb 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1256,6 +1256,21 @@ static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy, return ret; } +#ifdef CONFIG_PM +static int ieee80211_suspend(struct wiphy *wiphy) +{ + return __ieee80211_suspend(wiphy_priv(wiphy)); +} + +static int ieee80211_resume(struct wiphy *wiphy) +{ + return __ieee80211_resume(wiphy_priv(wiphy)); +} +#else +#define ieee80211_suspend NULL +#define ieee80211_resume NULL +#endif + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1286,4 +1301,6 @@ struct cfg80211_ops mac80211_config_ops = { .set_txq_params = ieee80211_set_txq_params, .set_channel = ieee80211_set_channel, .set_mgmt_extra_ie = ieee80211_set_mgmt_extra_ie, + .suspend = ieee80211_suspend, + .resume = ieee80211_resume, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5eafd3affe27..faa2476a2451 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1006,6 +1006,10 @@ void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, u16 capab_info, u8 *pwr_constr_elem, u8 pwr_constr_elem_len); +/* Suspend/resume */ +int __ieee80211_suspend(struct ieee80211_hw *hw); +int __ieee80211_resume(struct ieee80211_hw *hw); + /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ extern const unsigned char rfc1042_header[6]; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c new file mode 100644 index 000000000000..6d17ed7fd49b --- /dev/null +++ b/net/mac80211/pm.c @@ -0,0 +1,114 @@ +#include +#include + +#include "ieee80211_i.h" +#include "led.h" + +int __ieee80211_suspend(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_init_conf conf; + struct sta_info *sta; + + flush_workqueue(local->hw.workqueue); + + /* disable keys */ + list_for_each_entry(sdata, &local->interfaces, list) + ieee80211_disable_keys(sdata); + + /* remove STAs */ + list_for_each_entry(sta, &local->sta_list, list) { + + if (local->ops->sta_notify) { + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + + local->ops->sta_notify(hw, &sdata->vif, + STA_NOTIFY_REMOVE, &sta->sta); + } + } + + /* remove all interfaces */ + list_for_each_entry(sdata, &local->interfaces, list) { + + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_MONITOR && + netif_running(sdata->dev)) { + conf.vif = &sdata->vif; + conf.type = sdata->vif.type; + conf.mac_addr = sdata->dev->dev_addr; + local->ops->remove_interface(hw, &conf); + } + } + + /* stop hardware */ + if (local->open_count) { + ieee80211_led_radio(local, false); + local->ops->stop(hw); + } + return 0; +} + +int __ieee80211_resume(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_init_conf conf; + struct sta_info *sta; + int res; + + /* restart hardware */ + if (local->open_count) { + res = local->ops->start(hw); + + ieee80211_led_radio(local, hw->conf.radio_enabled); + } + + /* add interfaces */ + list_for_each_entry(sdata, &local->interfaces, list) { + + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_MONITOR && + netif_running(sdata->dev)) { + conf.vif = &sdata->vif; + conf.type = sdata->vif.type; + conf.mac_addr = sdata->dev->dev_addr; + res = local->ops->add_interface(hw, &conf); + } + } + + /* add STAs back */ + list_for_each_entry(sta, &local->sta_list, list) { + + if (local->ops->sta_notify) { + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + + local->ops->sta_notify(hw, &sdata->vif, + STA_NOTIFY_ADD, &sta->sta); + } + } + + /* add back keys */ + list_for_each_entry(sdata, &local->interfaces, list) + if (netif_running(sdata->dev)) + ieee80211_enable_keys(sdata); + + /* setup RTS threshold */ + if (local->ops->set_rts_threshold) + local->ops->set_rts_threshold(hw, local->rts_threshold); + + /* reconfigure hardware */ + ieee80211_hw_config(local, ~0); + + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + + return 0; +} -- cgit v1.2.3 From f797eb7e2903571e9c0e7e5d64113f51209f8dc4 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 19 Jan 2009 18:48:46 +0200 Subject: mac80211: Fix MFP Association Comeback to use Timeout Interval IE The separate Association Comeback Time IE was removed from IEEE 802.11w and the Timeout Interval IE (from IEEE 802.11r) is used instead. The editing on this is still somewhat incomplete in IEEE 802.11w/D7.0, but still, the use of Timeout Interval IE is the expected mechanism. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 4 ++-- net/mac80211/mlme.c | 5 +++-- net/mac80211/util.c | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index faa2476a2451..a8c72742a8b1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -837,7 +837,7 @@ struct ieee802_11_elems { u8 *country_elem; u8 *pwr_constr_elem; u8 *quiet_elem; /* first quite element */ - u8 *assoc_comeback; + u8 *timeout_int; /* length of them, respectively */ u8 ssid_len; @@ -865,7 +865,7 @@ struct ieee802_11_elems { u8 pwr_constr_elem_len; u8 quiet_elem_len; u8 num_of_quiet_elem; /* can be more the one */ - u8 assoc_comeback_len; + u8 timeout_int_len; }; static inline struct ieee80211_local *hw_to_local( diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 43da6227b37c..b9e4b93089c4 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1317,9 +1317,10 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (status_code == WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY && - elems.assoc_comeback && elems.assoc_comeback_len == 4) { + elems.timeout_int && elems.timeout_int_len == 5 && + elems.timeout_int[0] == WLAN_TIMEOUT_ASSOC_COMEBACK) { u32 tu, ms; - tu = get_unaligned_le32(elems.assoc_comeback); + tu = get_unaligned_le32(elems.timeout_int + 1); ms = tu * 1024 / 1000; printk(KERN_DEBUG "%s: AP rejected association temporarily; " "comeback duration %u TU (%u ms)\n", diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 963e0473205c..3f559e3d0a7c 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -653,9 +653,9 @@ void ieee802_11_parse_elems(u8 *start, size_t len, elems->pwr_constr_elem = pos; elems->pwr_constr_elem_len = elen; break; - case WLAN_EID_ASSOC_COMEBACK_TIME: - elems->assoc_comeback = pos; - elems->assoc_comeback_len = elen; + case WLAN_EID_TIMEOUT_INTERVAL: + elems->timeout_int = pos; + elems->timeout_int_len = elen; break; default: break; -- cgit v1.2.3 From 5f936f11613c32ca7f8ed5fa333bb38a4501deeb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 21 Jan 2009 12:47:05 +0100 Subject: mac80211: constify ieee80211_if_conf.bssid Then one place can be a static const. Signed-off-by: Johannes Berg Acked-by: Ivo van Doorn Signed-off-by: John W. Linville --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index c78304db475e..6f0fe3564ca4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -176,7 +176,7 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) else if (sdata->vif.type == NL80211_IFTYPE_AP) conf.bssid = sdata->dev->dev_addr; else if (ieee80211_vif_is_mesh(&sdata->vif)) { - u8 zero[ETH_ALEN] = { 0 }; + static const u8 zero[ETH_ALEN] = { 0 }; conf.bssid = zero; } else { WARN_ON(1); -- cgit v1.2.3 From 881d948c23442173a011f1adcfe4c95bf7f27515 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 21 Jan 2009 15:13:48 +0100 Subject: wireless: restrict to 32 legacy rates Since the standards only define 12 legacy rates, 32 is certainly a sane upper limit and we don't need to use u64 everywhere. Add sanity checking that no more than 32 rates are registered and change the variables to u32 throughout. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 6 +++--- net/mac80211/mesh.c | 2 +- net/mac80211/mesh.h | 2 +- net/mac80211/mesh_plink.c | 6 +++--- net/mac80211/mlme.c | 16 ++++++++-------- net/mac80211/util.c | 4 ++-- net/wireless/core.c | 12 +++++++++--- net/wireless/util.c | 2 +- 8 files changed, 28 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index a8c72742a8b1..70366efc792e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -909,11 +909,11 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta); struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 *addr, u64 supp_rates); + u8 *bssid, u8 *addr, u32 supp_rates); int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); -u64 ieee80211_sta_get_rates(struct ieee80211_local *local, +u32 ieee80211_sta_get_rates(struct ieee80211_local *local, struct ieee802_11_elems *elems, enum ieee80211_band band); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, @@ -1026,7 +1026,7 @@ void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, void ieee802_11_parse_elems(u8 *start, size_t len, struct ieee802_11_elems *elems); int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); -u64 ieee80211_mandatory_rates(struct ieee80211_local *local, +u32 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band); void ieee80211_dynamic_ps_enable_work(struct work_struct *work); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 82f568e94365..2d573f8470d0 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -476,7 +476,7 @@ static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee802_11_elems elems; struct ieee80211_channel *channel; - u64 supp_rates = 0; + u32 supp_rates = 0; size_t baselen; int freq; enum ieee80211_band band = rx_status->band; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index f1196f5c3efe..9e064ee98ee0 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -236,7 +236,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len); int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); /* Mesh plinks */ -void mesh_neighbour_update(u8 *hw_addr, u64 rates, +void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data *sdata, bool add); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index c140a1b71a5e..a8bbdeca013a 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -93,7 +93,7 @@ static inline void mesh_plink_fsm_restart(struct sta_info *sta) * on it in the lifecycle management section! */ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, - u8 *hw_addr, u64 rates) + u8 *hw_addr, u32 rates) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; @@ -222,7 +222,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, return 0; } -void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data *sdata, +void mesh_neighbour_update(u8 *hw_addr, u32 rates, struct ieee80211_sub_if_data *sdata, bool peer_accepting_plinks) { struct ieee80211_local *local = sdata->local; @@ -447,7 +447,7 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_m spin_lock_bh(&sta->lock); } else if (!sta) { /* ftype == PLINK_OPEN */ - u64 rates; + u32 rates; if (!mesh_plink_free_count(sdata)) { mpl_dbg("Mesh plink error: no more free plinks\n"); rcu_read_unlock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b9e4b93089c4..9852da54f5e7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -73,7 +73,7 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie) static int ieee80211_compatible_rates(struct ieee80211_bss *bss, struct ieee80211_supported_band *sband, - u64 *rates) + u32 *rates) { int i, j, count; *rates = 0; @@ -93,14 +93,14 @@ static int ieee80211_compatible_rates(struct ieee80211_bss *bss, } /* also used by mesh code */ -u64 ieee80211_sta_get_rates(struct ieee80211_local *local, +u32 ieee80211_sta_get_rates(struct ieee80211_local *local, struct ieee802_11_elems *elems, enum ieee80211_band band) { struct ieee80211_supported_band *sband; struct ieee80211_rate *bitrates; size_t num_rates; - u64 supp_rates; + u32 supp_rates; int i, j; sband = local->hw.wiphy->bands[band]; @@ -253,7 +253,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss; int wmm = 0; struct ieee80211_supported_band *sband; - u64 rates = 0; + u32 rates = 0; size_t e_ies_len; if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { @@ -1282,7 +1282,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; struct sta_info *sta; - u64 rates, basic_rates; + u32 rates, basic_rates; u16 capab_info, status_code, aid; struct ieee802_11_elems elems; struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; @@ -1639,7 +1639,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; struct ieee80211_channel *channel; u64 beacon_timestamp, rx_timestamp; - u64 supp_rates = 0; + u32 supp_rates = 0; enum ieee80211_band band = rx_status->band; if (elems->ds_params && elems->ds_params_len == 1) @@ -1660,7 +1660,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, sta = sta_info_get(local, mgmt->sa); if (sta) { - u64 prev_rates; + u32 prev_rates; prev_rates = sta->sta.supp_rates[band]; /* make sure mandatory rates are always added */ @@ -2526,7 +2526,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) * must be callable in atomic context. */ struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid,u8 *addr, u64 supp_rates) + u8 *bssid,u8 *addr, u32 supp_rates) { struct ieee80211_local *local = sdata->local; struct sta_info *sta; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3f559e3d0a7c..ede96c4fea2e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -731,12 +731,12 @@ int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) return ret; } -u64 ieee80211_mandatory_rates(struct ieee80211_local *local, +u32 ieee80211_mandatory_rates(struct ieee80211_local *local, enum ieee80211_band band) { struct ieee80211_supported_band *sband; struct ieee80211_rate *bitrates; - u64 mandatory_rates; + u32 mandatory_rates; enum ieee80211_rate_flags mandatory_flag; int i; diff --git a/net/wireless/core.c b/net/wireless/core.c index b96fc0c3f1c4..125226476089 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -273,10 +273,16 @@ int wiphy_register(struct wiphy *wiphy) sband->band = band; - if (!sband->n_channels || !sband->n_bitrates) { - WARN_ON(1); + if (WARN_ON(!sband->n_channels || !sband->n_bitrates)) + return -EINVAL; + + /* + * Since we use a u32 for rate bitmaps in + * ieee80211_get_response_rate, we cannot + * have more than 32 legacy rates. + */ + if (WARN_ON(sband->n_bitrates > 32)) return -EINVAL; - } for (i = 0; i < sband->n_channels; i++) { sband->channels[i].orig_flags = diff --git a/net/wireless/util.c b/net/wireless/util.c index e76cc28b0345..487cdd9bcffc 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -9,7 +9,7 @@ struct ieee80211_rate * ieee80211_get_response_rate(struct ieee80211_supported_band *sband, - u64 basic_rates, int bitrate) + u32 basic_rates, int bitrate) { struct ieee80211_rate *result = &sband->bitrates[0]; int i; -- cgit v1.2.3 From 078e1e60dd6c6b0d4bc8d58ccb80c008e8efc9ff Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Jan 2009 18:07:31 +0100 Subject: mac80211: Add capability to enable/disable beaconing This patch adds a flag to notify drivers to start and stop beaconing when needed, for example, during a scan run. Based on Sujith's first patch to do the same, but now disables beaconing for all virtual interfaces while scanning, has a separate change flag and tracks user-space requests. Signed-off-by: Sujith Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 5 +++-- net/mac80211/main.c | 42 +++++++++++++++++++++++++++++++++++++++++- net/mac80211/mesh.c | 3 ++- net/mac80211/mlme.c | 3 ++- net/mac80211/scan.c | 18 +++++++++++------- 5 files changed, 59 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 3527de22cafb..a1a1344c5c4b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -523,7 +523,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, kfree(old); - return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | + IEEE80211_IFCC_BEACON_ENABLED); } static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, @@ -583,7 +584,7 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) synchronize_rcu(); kfree(old); - return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + return ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED); } /* Layer 2 Update frame (802.2 Type 1 LLC XID Update response) */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 6f0fe3564ca4..8d5c19e4a1bc 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -168,7 +168,6 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) return 0; memset(&conf, 0, sizeof(conf)); - conf.changed = changed; if (sdata->vif.type == NL80211_IFTYPE_STATION || sdata->vif.type == NL80211_IFTYPE_ADHOC) @@ -183,9 +182,50 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) return -EINVAL; } + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + break; + default: + /* do not warn to simplify caller in scan.c */ + changed &= ~IEEE80211_IFCC_BEACON_ENABLED; + if (WARN_ON(changed & IEEE80211_IFCC_BEACON)) + return -EINVAL; + changed &= ~IEEE80211_IFCC_BEACON; + break; + } + + if (changed & IEEE80211_IFCC_BEACON_ENABLED) { + if (local->sw_scanning) { + conf.enable_beacon = false; + } else { + /* + * Beacon should be enabled, but AP mode must + * check whether there is a beacon configured. + */ + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + conf.enable_beacon = + !!rcu_dereference(sdata->u.ap.beacon); + break; + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + conf.enable_beacon = true; + break; + default: + /* not reached */ + WARN_ON(1); + break; + } + } + } + if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) return -EINVAL; + conf.changed = changed; + return local->ops->config_interface(local_to_hw(local), &sdata->vif, &conf); } diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 2d573f8470d0..8a1fcaeee4f2 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -442,7 +442,8 @@ void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) ifmsh->housekeeping = true; queue_work(local->hw.workqueue, &ifmsh->work); - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | + IEEE80211_IFCC_BEACON_ENABLED); } void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9852da54f5e7..ec400479c5f6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1599,7 +1599,8 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, ifsta->probe_resp = skb; - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | + IEEE80211_IFCC_BEACON_ENABLED); rates = 0; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index a2caeed57f4e..8248d7b6ae82 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -472,8 +473,8 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) netif_addr_unlock(local->mdev); netif_tx_unlock_bh(local->mdev); - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { + mutex_lock(&local->iflist_mtx); + list_for_each_entry(sdata, &local->interfaces, list) { /* Tell AP we're back */ if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { @@ -482,8 +483,10 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) } } else netif_tx_wake_all_queues(sdata->dev); + + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED); } - rcu_read_unlock(); + mutex_unlock(&local->iflist_mtx); done: ieee80211_mlme_notify_scan_completed(local); @@ -491,7 +494,6 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_scan_completed); - void ieee80211_scan_work(struct work_struct *work) { struct ieee80211_local *local = @@ -633,8 +635,10 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, local->sw_scanning = true; - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { + mutex_lock(&local->iflist_mtx); + list_for_each_entry(sdata, &local->interfaces, list) { + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED); + if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { netif_tx_stop_all_queues(sdata->dev); @@ -643,7 +647,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, } else netif_tx_stop_all_queues(sdata->dev); } - rcu_read_unlock(); + mutex_unlock(&local->iflist_mtx); if (ssid) { local->scan_ssid_len = ssid_len; -- cgit v1.2.3 From 1fa25e413659f943dfec65da2abe713d566c7fdf Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:44 -0800 Subject: cfg80211: add wiphy_apply_custom_regulatory() This adds wiphy_apply_custom_regulatory() to be used by drivers prior to wiphy registration to apply a custom regulatory domain. This can be used by drivers that do not have a direct 1-1 mapping between a regulatory domain and a country. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 115 ++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b34fd84b3e2f..0d6059502b40 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -782,36 +782,18 @@ static u32 map_regdom_flags(u32 rd_flags) return channel_flags; } -/** - * freq_reg_info - get regulatory information for the given frequency - * @wiphy: the wiphy for which we want to process this rule for - * @center_freq: Frequency in KHz for which we want regulatory information for - * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one - * you can set this to 0. If this frequency is allowed we then set - * this value to the maximum allowed bandwidth. - * @reg_rule: the regulatory rule which we have for this frequency - * - * Use this function to get the regulatory rule for a specific frequency on - * a given wireless device. If the device has a specific regulatory domain - * it wants to follow we respect that unless a country IE has been received - * and processed already. - * - * Returns 0 if it was able to find a valid regulatory rule which does - * apply to the given center_freq otherwise it returns non-zero. It will - * also return -ERANGE if we determine the given center_freq does not even have - * a regulatory rule for a frequency range in the center_freq's band. See - * freq_in_rule_band() for our current definition of a band -- this is purely - * subjective and right now its 802.11 specific. - */ -static int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, - const struct ieee80211_reg_rule **reg_rule) +static int freq_reg_info_regd(struct wiphy *wiphy, + u32 center_freq, + u32 *bandwidth, + const struct ieee80211_reg_rule **reg_rule, + const struct ieee80211_regdomain *custom_regd) { int i; bool band_rule_found = false; const struct ieee80211_regdomain *regd; u32 max_bandwidth = 0; - regd = cfg80211_regdomain; + regd = custom_regd ? custom_regd : cfg80211_regdomain; /* Follow the driver's regulatory domain, if present, unless a country * IE has been processed */ @@ -852,6 +834,34 @@ static int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, return !max_bandwidth; } +/** + * freq_reg_info - get regulatory information for the given frequency + * @wiphy: the wiphy for which we want to process this rule for + * @center_freq: Frequency in KHz for which we want regulatory information for + * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one + * you can set this to 0. If this frequency is allowed we then set + * this value to the maximum allowed bandwidth. + * @reg_rule: the regulatory rule which we have for this frequency + * + * Use this function to get the regulatory rule for a specific frequency on + * a given wireless device. If the device has a specific regulatory domain + * it wants to follow we respect that unless a country IE has been received + * and processed already. + * + * Returns 0 if it was able to find a valid regulatory rule which does + * apply to the given center_freq otherwise it returns non-zero. It will + * also return -ERANGE if we determine the given center_freq does not even have + * a regulatory rule for a frequency range in the center_freq's band. See + * freq_in_rule_band() for our current definition of a band -- this is purely + * subjective and right now its 802.11 specific. + */ +static int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, + const struct ieee80211_reg_rule **reg_rule) +{ + return freq_reg_info_regd(wiphy, center_freq, + bandwidth, reg_rule, NULL); +} + static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, unsigned int chan_idx) { @@ -962,6 +972,63 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) wiphy->reg_notifier(wiphy, setby); } +static void handle_channel_custom(struct wiphy *wiphy, + enum ieee80211_band band, + unsigned int chan_idx, + const struct ieee80211_regdomain *regd) +{ + int r; + u32 max_bandwidth = 0; + const struct ieee80211_reg_rule *reg_rule = NULL; + const struct ieee80211_power_rule *power_rule = NULL; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + + sband = wiphy->bands[band]; + BUG_ON(chan_idx >= sband->n_channels); + chan = &sband->channels[chan_idx]; + + r = freq_reg_info_regd(wiphy, MHZ_TO_KHZ(chan->center_freq), + &max_bandwidth, ®_rule, regd); + + if (r) { + chan->flags = IEEE80211_CHAN_DISABLED; + return; + } + + power_rule = ®_rule->power_rule; + + chan->flags |= map_regdom_flags(reg_rule->flags); + chan->max_antenna_gain = (int) MBI_TO_DBI(power_rule->max_antenna_gain); + chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth); + chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); +} + +static void handle_band_custom(struct wiphy *wiphy, enum ieee80211_band band, + const struct ieee80211_regdomain *regd) +{ + unsigned int i; + struct ieee80211_supported_band *sband; + + BUG_ON(!wiphy->bands[band]); + sband = wiphy->bands[band]; + + for (i = 0; i < sband->n_channels; i++) + handle_channel_custom(wiphy, band, i, regd); +} + +/* Used by drivers prior to wiphy registration */ +void wiphy_apply_custom_regulatory(struct wiphy *wiphy, + const struct ieee80211_regdomain *regd) +{ + enum ieee80211_band band; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (wiphy->bands[band]) + handle_band_custom(wiphy, band, regd); + } +} +EXPORT_SYMBOL(wiphy_apply_custom_regulatory); + static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, const struct ieee80211_regdomain *src_regd) { -- cgit v1.2.3 From 34f573473a659f8c2727d8d408e17b241900c28e Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:45 -0800 Subject: cfg80211: export freq_reg_info() This can be used by drivers on the reg_notifier() Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 24 ++---------------------- 1 file changed, 2 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0d6059502b40..d663795d6944 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -833,29 +833,9 @@ static int freq_reg_info_regd(struct wiphy *wiphy, return !max_bandwidth; } +EXPORT_SYMBOL(freq_reg_info); -/** - * freq_reg_info - get regulatory information for the given frequency - * @wiphy: the wiphy for which we want to process this rule for - * @center_freq: Frequency in KHz for which we want regulatory information for - * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one - * you can set this to 0. If this frequency is allowed we then set - * this value to the maximum allowed bandwidth. - * @reg_rule: the regulatory rule which we have for this frequency - * - * Use this function to get the regulatory rule for a specific frequency on - * a given wireless device. If the device has a specific regulatory domain - * it wants to follow we respect that unless a country IE has been received - * and processed already. - * - * Returns 0 if it was able to find a valid regulatory rule which does - * apply to the given center_freq otherwise it returns non-zero. It will - * also return -ERANGE if we determine the given center_freq does not even have - * a regulatory rule for a frequency range in the center_freq's band. See - * freq_in_rule_band() for our current definition of a band -- this is purely - * subjective and right now its 802.11 specific. - */ -static int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, +int freq_reg_info(struct wiphy *wiphy, u32 center_freq, u32 *bandwidth, const struct ieee80211_reg_rule **reg_rule) { return freq_reg_info_regd(wiphy, center_freq, -- cgit v1.2.3 From 5eebade608d695e30e89d4c5ca6136a58f24ed14 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:47 -0800 Subject: cfg80211: process user requests only after previous user/driver/core requests This prevents user regulatory changes to be considered prior to previous pending user, core or driver requests which have not be applied. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d663795d6944..4d2d2d4cc0d4 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1091,6 +1091,16 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, if (last_request->initiator == REGDOM_SET_BY_USER && last_request->intersect) return -EOPNOTSUPP; + /* Process user requests only after previous user/driver/core + * requests have been processed */ + if (last_request->initiator == REGDOM_SET_BY_CORE || + last_request->initiator == REGDOM_SET_BY_DRIVER || + last_request->initiator == REGDOM_SET_BY_USER) { + if (!alpha2_equal(last_request->alpha2, + cfg80211_regdomain->alpha2)) + return -EAGAIN; + } + return 0; } -- cgit v1.2.3 From e74b1e7fb2f12db36f25af2158ee6e2940e4f138 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:48 -0800 Subject: cfg80211: ignore consecutive equal regulatory hints We ignore regulatory hints for the same alpha2 if we already have processed the same alpha2 on the current regulatory domain. For a driver regulatory_hint() this means we copy onto its wiphy->regd the previously procesed regulatory domain from CRDA without having to call CRDA again. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4d2d2d4cc0d4..c201abd38ad1 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1080,8 +1080,13 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, } return REG_INTERSECT; case REGDOM_SET_BY_DRIVER: - if (last_request->initiator == REGDOM_SET_BY_CORE) - return 0; + if (last_request->initiator == REGDOM_SET_BY_CORE) { + if (is_old_static_regdom(cfg80211_regdomain)) + return 0; + if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return 0; + return -EALREADY; + } return REG_INTERSECT; case REGDOM_SET_BY_USER: if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) @@ -1101,6 +1106,10 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return -EAGAIN; } + if (!is_old_static_regdom(cfg80211_regdomain) && + alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return -EALREADY; + return 0; } -- cgit v1.2.3 From 2a44f911d8bac3e6c97a25cc612e4324dfbdfdc4 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:49 -0800 Subject: cfg80211: rename fw_handles_regulatory to custom_regulatory Drivers without firmware can also have custom regulatory maps which do not map to a specific ISO / IEC alpha2 country code. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index c201abd38ad1..5db02a3d9c02 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -927,7 +927,7 @@ static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) if (!last_request) return true; if (setby == REGDOM_SET_BY_CORE && - wiphy->fw_handles_regulatory) + wiphy->custom_regulatory) return true; return false; } -- cgit v1.2.3 From d46e5b1d0c617a2a46353812d7f02115c17b5e72 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:50 -0800 Subject: cfg80211: move check for ignore_reg_update() on wiphy_update_regulatory() This ensures that the initial REGDOM_SET_BY_CORE upon wiphy registration respects the wiphy->custom_regulatory setting. Without this and if OLD_REG is disabled (which will be default soon as we remove it) the wiphy->custom_regulatory is simply ignored. Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/reg.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5db02a3d9c02..81acb07f1d44 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -937,13 +937,15 @@ static void update_all_wiphy_regulatory(enum reg_set_by setby) struct cfg80211_registered_device *drv; list_for_each_entry(drv, &cfg80211_drv_list, list) - if (!ignore_reg_update(&drv->wiphy, setby)) - wiphy_update_regulatory(&drv->wiphy, setby); + wiphy_update_regulatory(&drv->wiphy, setby); } void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) { enum ieee80211_band band; + + if (ignore_reg_update(wiphy, setby)) + return; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) handle_band(wiphy, band); -- cgit v1.2.3 From 716f9392e2b84cacc18cc11f7427cb98adeb1c3d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:51 -0800 Subject: cfg80211: pass more detailed regulatory request information on reg_notifier() Drivers may need more information than just who set the last regulatory domain, as such lets just pass the last regulatory_request receipt. To do this we need to move out to headers struct regulatory_request, and enum environment_cap. While at it lets add documentation for enum environment_cap. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 34 +--------------------------------- net/wireless/reg.h | 7 ------- 2 files changed, 1 insertion(+), 40 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 81acb07f1d44..cad4daadba0d 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -42,38 +42,6 @@ #include "core.h" #include "reg.h" -/** - * struct regulatory_request - receipt of last regulatory request - * - * @wiphy: this is set if this request's initiator is - * %REGDOM_SET_BY_COUNTRY_IE or %REGDOM_SET_BY_DRIVER. This - * can be used by the wireless core to deal with conflicts - * and potentially inform users of which devices specifically - * cased the conflicts. - * @initiator: indicates who sent this request, could be any of - * of those set in reg_set_by, %REGDOM_SET_BY_* - * @alpha2: the ISO / IEC 3166 alpha2 country code of the requested - * regulatory domain. We have a few special codes: - * 00 - World regulatory domain - * 99 - built by driver but a specific alpha2 cannot be determined - * 98 - result of an intersection between two regulatory domains - * @intersect: indicates whether the wireless core should intersect - * the requested regulatory domain with the presently set regulatory - * domain. - * @country_ie_checksum: checksum of the last processed and accepted - * country IE - * @country_ie_env: lets us know if the AP is telling us we are outdoor, - * indoor, or if it doesn't matter - */ -struct regulatory_request { - struct wiphy *wiphy; - enum reg_set_by initiator; - char alpha2[2]; - bool intersect; - u32 country_ie_checksum; - enum environment_cap country_ie_env; -}; - /* Receipt of information from last regulatory request */ static struct regulatory_request *last_request; @@ -951,7 +919,7 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) handle_band(wiphy, band); } if (wiphy->reg_notifier) - wiphy->reg_notifier(wiphy, setby); + wiphy->reg_notifier(wiphy, last_request); } static void handle_channel_custom(struct wiphy *wiphy, diff --git a/net/wireless/reg.h b/net/wireless/reg.h index a76ea3ff7cd6..eb1dd5bc9b27 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -11,13 +11,6 @@ void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); -enum environment_cap { - ENVIRON_ANY, - ENVIRON_INDOOR, - ENVIRON_OUTDOOR, -}; - - /** * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if the hint comes from country information from an AP, this -- cgit v1.2.3 From f976376de0d6a9697fb635369f12ae00251f4566 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:52 -0800 Subject: cfg80211: Allow for strict regulatory settings This allows drivers to request strict regulatory settings to be applied to its devices. This is desirable for devices where proper calibration and compliance can only be gauranteed for for the device's programmed regulatory domain. Regulatory domain settings will be ignored until the device's own regulatory domain is properly configured. If no regulatory domain is received only the world regulatory domain will be applied -- if OLD_REG (default to "US") is not enabled. If OLD_REG behaviour is not acceptable to drivers they must update their wiphy with a custom reuglatory prior to wiphy registration. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index cad4daadba0d..89e0d8b3cf1e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -867,6 +867,22 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, power_rule = ®_rule->power_rule; + if (last_request->initiator == REGDOM_SET_BY_DRIVER && + last_request->wiphy && last_request->wiphy == wiphy && + last_request->wiphy->strict_regulatory) { + /* This gaurantees the driver's requested regulatory domain + * will always be used as a base for further regulatory + * settings */ + chan->flags = chan->orig_flags = + map_regdom_flags(reg_rule->flags); + chan->max_antenna_gain = chan->orig_mag = + (int) MBI_TO_DBI(power_rule->max_antenna_gain); + chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth); + chan->max_power = chan->orig_mpwr = + (int) MBM_TO_DBM(power_rule->max_eirp); + return; + } + chan->flags = flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = min(chan->orig_mag, (int) MBI_TO_DBI(power_rule->max_antenna_gain)); @@ -897,6 +913,11 @@ static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) if (setby == REGDOM_SET_BY_CORE && wiphy->custom_regulatory) return true; + /* wiphy->regd will be set once the device has its own + * desired regulatory domain set */ + if (wiphy->strict_regulatory && !wiphy->regd && + !is_world_regdom(last_request->alpha2)) + return true; return false; } @@ -1155,10 +1176,15 @@ new_request: void regulatory_hint(struct wiphy *wiphy, const char *alpha2) { + int r; BUG_ON(!alpha2); mutex_lock(&cfg80211_drv_mutex); - __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, 0, ENVIRON_ANY); + r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, + alpha2, 0, ENVIRON_ANY); + /* This is required so that the orig_* parameters are saved */ + if (r == -EALREADY && wiphy->strict_regulatory) + wiphy_update_regulatory(wiphy, REGDOM_SET_BY_DRIVER); mutex_unlock(&cfg80211_drv_mutex); } EXPORT_SYMBOL(regulatory_hint); -- cgit v1.2.3 From 9a95371aa26e3cb9fb1340362912000088ff3c3e Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:53 -0800 Subject: mac80211: allow mac80211 drivers to get to struct ieee80211_hw from wiphy If a driver is given a wiphy and it wants to get to its private mac80211 driver area it can use wiphy_to_ieee80211_hw() to get first to its ieee80211_hw and then access the private structure via hw->priv. The wiphy_priv() is already being used internally by mac80211 and drivers should not use this. This can be helpful in a drivers reg_notifier(). Signed-off-by: Luis R. Rodriguez Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/util.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ede96c4fea2e..fc30f2940e1e 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -41,6 +41,15 @@ const unsigned char rfc1042_header[] __aligned(2) = const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; +struct ieee80211_hw *wiphy_to_ieee80211_hw(struct wiphy *wiphy) +{ + struct ieee80211_local *local; + BUG_ON(!wiphy); + + local = wiphy_priv(wiphy); + return &local->hw; +} +EXPORT_SYMBOL(wiphy_to_ieee80211_hw); u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, enum nl80211_iftype type) -- cgit v1.2.3 From 24ed1da1337b92e3b0a89f2c2b7cd33b9a8fcb62 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 22 Jan 2009 15:05:54 -0800 Subject: cfg80211: allow users to help a driver's compliance Let users be more compliant if so desired. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 89e0d8b3cf1e..af9132cea931 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -764,8 +764,9 @@ static int freq_reg_info_regd(struct wiphy *wiphy, regd = custom_regd ? custom_regd : cfg80211_regdomain; /* Follow the driver's regulatory domain, if present, unless a country - * IE has been processed */ + * IE has been processed or a user wants to help complaince further */ if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE && + last_request->initiator != REGDOM_SET_BY_USER && wiphy->regd) regd = wiphy->regd; -- cgit v1.2.3 From ae54c985cc7daa502da6e7eb3b223a30fbbf4cfb Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Fri, 23 Jan 2009 05:33:37 +0100 Subject: mac80211: Read the TSF via debugfs This patch adds an low-level driver independent entry to read the TSF value into the debugfs of mac80211. This makes debugging the IBSS handling of wifi drivers easier. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/debugfs.c | 4 ++++ net/mac80211/ieee80211_i.h | 1 + 2 files changed, 5 insertions(+) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 18541bb75096..717d5484e1e5 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -61,6 +61,8 @@ DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", local->wep_iv & 0xffffff); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : ""); +DEBUGFS_READONLY_FILE(tsf, 20, "%#018llx", + (unsigned long long) (local->ops->get_tsf ? local->ops->get_tsf(local_to_hw(local)) : 0)); /* statistics stuff */ @@ -202,6 +204,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(long_retry_limit); DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); + DEBUGFS_ADD(tsf); statsd = debugfs_create_dir("statistics", phyd); local->debugfs.statistics = statsd; @@ -255,6 +258,7 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(long_retry_limit); DEBUGFS_DEL(total_ps_buffered); DEBUGFS_DEL(wep_iv); + DEBUGFS_DEL(tsf); DEBUGFS_STATS_DEL(transmitted_fragment_count); DEBUGFS_STATS_DEL(multicast_transmitted_frame_count); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 70366efc792e..927cbde8c19c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -744,6 +744,7 @@ struct ieee80211_local { struct dentry *long_retry_limit; struct dentry *total_ps_buffered; struct dentry *wep_iv; + struct dentry *tsf; struct dentry *statistics; struct local_debugfsdentries_statsdentries { struct dentry *transmitted_fragment_count; -- cgit v1.2.3 From dfe670121a2719be6ead12eb5306d4d2714c09cb Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Sat, 24 Jan 2009 01:19:04 +0100 Subject: mac80211: Fixed BSSID handling revisited This patch cleanup the fixed BSSID handling, that ieee80211_sta_set_bssid() works like ieee80211_sta_set_ssid(). So that the BSSID is only a second selection criterion besides the SSID. This allows us to create new IBSS networks with fixed BSSIDs, which was broken before. In the second version of this patch the handling of the stupid merges to the same BSSID is moved out to get reworked into an other patch. And this version hopefully solves the problems with some low-level drivers and re-adds the config BSSID warning to help debugging the low-level drivers. Much thanks to all who have helped testing! :) Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 72 ++++++++++++++++++++++++----------------------------- 1 file changed, 33 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ec400479c5f6..9d51e278c1e5 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1615,6 +1615,7 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, ieee80211_sta_def_wmm_params(sdata, bss); + ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED; mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); @@ -2178,19 +2179,18 @@ static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, int i; int ret; -#if 0 - /* Easier testing, use fixed BSSID. */ - memset(bssid, 0xfe, ETH_ALEN); -#else - /* Generate random, not broadcast, locally administered BSSID. Mix in - * own MAC address to make sure that devices that do not have proper - * random number generator get different BSSID. */ - get_random_bytes(bssid, ETH_ALEN); - for (i = 0; i < ETH_ALEN; i++) - bssid[i] ^= sdata->dev->dev_addr[i]; - bssid[0] &= ~0x01; - bssid[0] |= 0x02; -#endif + if (sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) { + memcpy(bssid, ifsta->bssid, ETH_ALEN); + } else { + /* Generate random, not broadcast, locally administered BSSID. Mix in + * own MAC address to make sure that devices that do not have proper + * random number generator get different BSSID. */ + get_random_bytes(bssid, ETH_ALEN); + for (i = 0; i < ETH_ALEN; i++) + bssid[i] ^= sdata->dev->dev_addr[i]; + bssid[0] &= ~0x01; + bssid[0] |= 0x02; + } printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n", sdata->dev->name, bssid); @@ -2251,6 +2251,9 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, memcmp(ifsta->ssid, bss->ssid, bss->ssid_len) != 0 || !(bss->capability & WLAN_CAPABILITY_IBSS)) continue; + if ((ifsta->flags & IEEE80211_STA_BSSID_SET) && + memcmp(ifsta->bssid, bss->bssid, ETH_ALEN) != 0) + continue; #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG " bssid=%pM found\n", bss->bssid); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ @@ -2267,7 +2270,9 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, "%pM\n", bssid, ifsta->bssid); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - if (found && memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { + if (found && + ((!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) || + memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0)) { int ret; int search_freq; @@ -2605,16 +2610,16 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size memset(ifsta->ssid, 0, sizeof(ifsta->ssid)); memcpy(ifsta->ssid, ssid, len); ifsta->ssid_len = len; - ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; } + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; + if (len) ifsta->flags |= IEEE80211_STA_SSID_SET; else ifsta->flags &= ~IEEE80211_STA_SSID_SET; - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { ifsta->ibss_join_req = jiffies; ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; return ieee80211_sta_find_ibss(sdata, ifsta); @@ -2634,36 +2639,25 @@ int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) { struct ieee80211_if_sta *ifsta; - int res; - bool valid; ifsta = &sdata->u.sta; - valid = is_valid_ether_addr(bssid); - if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { - if(valid) - memcpy(ifsta->bssid, bssid, ETH_ALEN); - else - memset(ifsta->bssid, 0, ETH_ALEN); - res = 0; - /* - * Hack! See also ieee80211_sta_set_ssid. - */ - if (netif_running(sdata->dev)) - res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); - if (res) { + if (is_valid_ether_addr(bssid)) { + memcpy(ifsta->bssid, bssid, ETH_ALEN); + ifsta->flags |= IEEE80211_STA_BSSID_SET; + } else { + memset(ifsta->bssid, 0, ETH_ALEN); + ifsta->flags &= ~IEEE80211_STA_BSSID_SET; + } + + if (netif_running(sdata->dev)) { + if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) { printk(KERN_DEBUG "%s: Failed to config new BSSID to " "the low-level driver\n", sdata->dev->name); - return res; } } - if (valid) - ifsta->flags |= IEEE80211_STA_BSSID_SET; - else - ifsta->flags &= ~IEEE80211_STA_BSSID_SET; - - return 0; + return ieee80211_sta_set_ssid(sdata, ifsta->ssid, ifsta->ssid_len); } int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) -- cgit v1.2.3 From 30d3ef41b4395d9bee5f481395eef2d3b8b6ee50 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Fri, 23 Jan 2009 23:09:35 -0500 Subject: mac80211: change workqueue back to non-freezeable "mac80211: make workqueue freezable" made the mac80211 workqueue freezeable to prevent us from doing any work after the driver went away. This was fine before mac80211 had any suspend support. However, now we want to flush this workqueue in suspend(). Because the thread for a freezeable workqueue is stopped before the device class suspend() is called, flush_workqueue() will hang in the suspend-to-disk case. Converting it back to a non-freezeable queue will keep suspend from hanging. Moreover, since we flush the workqueue under RTNL and userspace is stopped, there won't be any new work in the workqueue until after resume. Thus we still don't have to worry about pinging the AP without hardware. Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- net/mac80211/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 8d5c19e4a1bc..210dfe3cf6c3 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -858,7 +858,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) mdev->set_multicast_list = ieee80211_master_set_multicast_list; local->hw.workqueue = - create_freezeable_workqueue(wiphy_name(local->hw.wiphy)); + create_singlethread_workqueue(wiphy_name(local->hw.wiphy)); if (!local->hw.workqueue) { result = -ENOMEM; goto fail_workqueue; -- cgit v1.2.3 From e874e6585539f6706b8e5f96125c9fca89cce716 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 24 Jan 2009 13:21:14 -0500 Subject: mac80211: flush workqueue a second time in suspend() Drivers can theoretically queue more work in one of their callbacks from mac80211 suspend, so let's flush it once more to be on the safe side, just before calling ->stop(). Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- net/mac80211/pm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 6d17ed7fd49b..44525f517077 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -44,6 +44,9 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) } } + /* flush again, in case driver queued work */ + flush_workqueue(local->hw.workqueue); + /* stop hardware */ if (local->open_count) { ieee80211_led_radio(local, false); -- cgit v1.2.3 From c771c9d8da1e8292ef8bf7fd4ce135dacc650130 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 23 Jan 2009 22:54:03 +0100 Subject: mac80211: add interface list lock Using only the RTNL has a number of problems, most notably that ieee80211_iterate_active_interfaces() and other interface list traversals cannot be done from the internal workqueue because it needs to be flushed under the RTNL. This patch introduces a new mutex that protects the interface list against modifications. A more detailed explanation is part of the code change. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/iface.c | 31 +++++++++++++++++++++++++++++++ net/mac80211/main.c | 3 +++ net/mac80211/util.c | 4 ++-- 4 files changed, 38 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 927cbde8c19c..eaf3603862b7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -643,7 +643,9 @@ struct ieee80211_local { struct crypto_blkcipher *wep_rx_tfm; u32 wep_iv; + /* see iface.c */ struct list_head interfaces; + struct mutex iflist_mtx; /* * Key lock, protects sdata's key_list and sta_info's diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8dc2c2188d92..00562a8b99cf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -21,6 +21,23 @@ #include "mesh.h" #include "led.h" +/** + * DOC: Interface list locking + * + * The interface list in each struct ieee80211_local is protected + * three-fold: + * + * (1) modifications may only be done under the RTNL + * (2) modifications and readers are protected against each other by + * the iflist_mtx. + * (3) modifications are done in an RCU manner so atomic readers + * can traverse the list in RCU-safe blocks. + * + * As a consequence, reads (traversals) of the list can be protected + * by either the RTNL, the iflist_mtx or RCU. + */ + + static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) { int meshhdrlen; @@ -800,7 +817,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, params->mesh_id_len, params->mesh_id); + mutex_lock(&local->iflist_mtx); list_add_tail_rcu(&sdata->list, &local->interfaces); + mutex_unlock(&local->iflist_mtx); if (new_dev) *new_dev = ndev; @@ -816,7 +835,10 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) { ASSERT_RTNL(); + mutex_lock(&sdata->local->iflist_mtx); list_del_rcu(&sdata->list); + mutex_unlock(&sdata->local->iflist_mtx); + synchronize_rcu(); unregister_netdevice(sdata->dev); } @@ -832,7 +854,16 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) ASSERT_RTNL(); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { + /* + * we cannot hold the iflist_mtx across unregister_netdevice, + * but we only need to hold it for list modifications to lock + * out readers since we're under the RTNL here as all other + * writers. + */ + mutex_lock(&local->iflist_mtx); list_del(&sdata->list); + mutex_unlock(&local->iflist_mtx); + unregister_netdevice(sdata->dev); } } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 210dfe3cf6c3..a109c06e8e4e 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -758,6 +758,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.conf.radio_enabled = true; INIT_LIST_HEAD(&local->interfaces); + mutex_init(&local->iflist_mtx); spin_lock_init(&local->key_lock); @@ -1008,6 +1009,8 @@ void ieee80211_free_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); + mutex_destroy(&local->iflist_mtx); + wiphy_free(local->hw.wiphy); } EXPORT_SYMBOL(ieee80211_free_hw); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index fc30f2940e1e..73c7d7345abd 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -468,7 +468,7 @@ void ieee80211_iterate_active_interfaces( struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; - rtnl_lock(); + mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { @@ -489,7 +489,7 @@ void ieee80211_iterate_active_interfaces( &sdata->vif); } - rtnl_unlock(); + mutex_unlock(&local->iflist_mtx); } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces); -- cgit v1.2.3 From 3b5d665b51cda73ef1a774b515afd879a38e3674 Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Sat, 24 Jan 2009 07:09:59 +0100 Subject: mac80211: Generic TSF debugging This patch enables low-level driver independent debugging of the TSF and remove the driver specific things of ath5k and ath9k from the debugfs. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/debugfs.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 52 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 717d5484e1e5..e37f557de3f3 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -57,12 +57,61 @@ DEBUGFS_READONLY_FILE(long_retry_limit, 20, "%d", local->hw.conf.long_frame_max_tx_count); DEBUGFS_READONLY_FILE(total_ps_buffered, 20, "%d", local->total_ps_buffered); -DEBUGFS_READONLY_FILE(wep_iv, 20, "%#06x", +DEBUGFS_READONLY_FILE(wep_iv, 20, "%#08x", local->wep_iv & 0xffffff); DEBUGFS_READONLY_FILE(rate_ctrl_alg, 100, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : ""); -DEBUGFS_READONLY_FILE(tsf, 20, "%#018llx", - (unsigned long long) (local->ops->get_tsf ? local->ops->get_tsf(local_to_hw(local)) : 0)); + +static ssize_t tsf_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + u64 tsf = 0; + char buf[100]; + + if (local->ops->get_tsf) + tsf = local->ops->get_tsf(local_to_hw(local)); + + snprintf(buf, sizeof(buf), "0x%016llx\n", (unsigned long long) tsf); + + return simple_read_from_buffer(user_buf, count, ppos, buf, 19); +} + +static ssize_t tsf_write(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + unsigned long long tsf; + char buf[100]; + size_t len; + + len = min(count, sizeof(buf) - 1); + if (copy_from_user(buf, user_buf, len)) + return -EFAULT; + buf[len] = '\0'; + + if (strncmp(buf, "reset", 5) == 0) { + if (local->ops->reset_tsf) { + local->ops->reset_tsf(local_to_hw(local)); + printk(KERN_INFO "%s: debugfs reset TSF\n", wiphy_name(local->hw.wiphy)); + } + } else { + tsf = simple_strtoul(buf, NULL, 0); + if (local->ops->set_tsf) { + local->ops->set_tsf(local_to_hw(local), tsf); + printk(KERN_INFO "%s: debugfs set TSF to %#018llx\n", wiphy_name(local->hw.wiphy), tsf); + } + } + + return count; +} + +static const struct file_operations tsf_ops = { + .read = tsf_read, + .write = tsf_write, + .open = mac80211_open_file_generic +}; /* statistics stuff */ -- cgit v1.2.3 From d81c2d9c909e95ee8a5745da95bbb35f8ded3d17 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 26 Jan 2009 09:00:51 -0800 Subject: cfg80211: do not pass -EALREADY to userspace on regdomain change request If the regulatory domain is already set it is technically not an error so do not pass an errno to userspace. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 09a5d0f1d6dc..e69da8d20474 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1908,6 +1908,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) mutex_lock(&cfg80211_drv_mutex); r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); mutex_unlock(&cfg80211_drv_mutex); + /* This means the regulatory domain was already set, however + * we don't want to confuse userspace with a "successful error" + * message so lets just treat it as a success */ + if (r == -EALREADY) + r = 0; return r; } -- cgit v1.2.3 From fb9ddbf086591ab4c90c44d10468f84d465b3fdf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 26 Jan 2009 19:11:57 +0100 Subject: mac80211: don't try to powersave/config disabled interfaces Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/scan.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 8248d7b6ae82..282e6a0dec01 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -475,6 +475,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + /* Tell AP we're back */ if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { @@ -637,6 +640,9 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { + if (!netif_running(sdata->dev)) + continue; + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED); if (sdata->vif.type == NL80211_IFTYPE_STATION) { -- cgit v1.2.3 From 5d0d9be8ef456afc6c3fb5f8aad06ef19b704b05 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 29 Jan 2009 14:19:48 +0000 Subject: gro: Move common completion code into helpers Currently VLAN still has a bit of common code handling the aftermath of GRO that's shared with the common path. This patch moves them into shared helpers to reduce code duplication. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 39 +++----------------------- net/core/dev.c | 76 +++++++++++++++++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 59 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index e9db889d6222..2eb057a74654 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -98,22 +98,7 @@ drop: int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb) { - int err = NET_RX_SUCCESS; - - switch (vlan_gro_common(napi, grp, vlan_tci, skb)) { - case -1: - return netif_receive_skb(skb); - - case 2: - err = NET_RX_DROP; - /* fall through */ - - case 1: - kfree_skb(skb); - break; - } - - return err; + return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb); } EXPORT_SYMBOL(vlan_gro_receive); @@ -121,27 +106,11 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct napi_gro_fraginfo *info) { struct sk_buff *skb = napi_fraginfo_skb(napi, info); - int err = NET_RX_DROP; if (!skb) - goto out; - - err = NET_RX_SUCCESS; - - switch (vlan_gro_common(napi, grp, vlan_tci, skb)) { - case -1: - return netif_receive_skb(skb); - - case 2: - err = NET_RX_DROP; - /* fall through */ - - case 1: - napi_reuse_skb(napi, skb); - break; - } + return NET_RX_DROP; -out: - return err; + return napi_frags_finish(napi, skb, + vlan_gro_common(napi, grp, vlan_tci, skb)); } EXPORT_SYMBOL(vlan_gro_frags); diff --git a/net/core/dev.c b/net/core/dev.c index e61b95c11fc0..cd23ae15a1d5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -135,6 +135,14 @@ /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) +enum { + GRO_MERGED, + GRO_MERGED_FREE, + GRO_HELD, + GRO_NORMAL, + GRO_DROP, +}; + /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -2369,7 +2377,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) int count = 0; int same_flow; int mac_len; - int free; + int ret; if (!(skb->dev->features & NETIF_F_GRO)) goto normal; @@ -2412,7 +2420,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) goto normal; same_flow = NAPI_GRO_CB(skb)->same_flow; - free = NAPI_GRO_CB(skb)->free; + ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED; if (pp) { struct sk_buff *nskb = *pp; @@ -2435,12 +2443,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) skb_shinfo(skb)->gso_size = skb->len; skb->next = napi->gro_list; napi->gro_list = skb; + ret = GRO_HELD; ok: - return free; + return ret; normal: - return -1; + return GRO_NORMAL; } EXPORT_SYMBOL(dev_gro_receive); @@ -2456,18 +2465,30 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return dev_gro_receive(napi, skb); } -int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +int napi_skb_finish(int ret, struct sk_buff *skb) { - switch (__napi_gro_receive(napi, skb)) { - case -1: + int err = NET_RX_SUCCESS; + + switch (ret) { + case GRO_NORMAL: return netif_receive_skb(skb); - case 1: + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ + + case GRO_MERGED_FREE: kfree_skb(skb); break; } - return NET_RX_SUCCESS; + return err; +} +EXPORT_SYMBOL(napi_skb_finish); + +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +{ + return napi_skb_finish(__napi_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -2520,29 +2541,36 @@ out: } EXPORT_SYMBOL(napi_fraginfo_skb); -int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { - struct sk_buff *skb = napi_fraginfo_skb(napi, info); - int err = NET_RX_DROP; - - if (!skb) - goto out; + int err = NET_RX_SUCCESS; - err = NET_RX_SUCCESS; - - switch (__napi_gro_receive(napi, skb)) { - case -1: + switch (ret) { + case GRO_NORMAL: return netif_receive_skb(skb); - case 0: - goto out; - } + case GRO_DROP: + err = NET_RX_DROP; + /* fall through */ - napi_reuse_skb(napi, skb); + case GRO_MERGED_FREE: + napi_reuse_skb(napi, skb); + break; + } -out: return err; } +EXPORT_SYMBOL(napi_frags_finish); + +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info) +{ + struct sk_buff *skb = napi_fraginfo_skb(napi, info); + + if (!skb) + return NET_RX_DROP; + + return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); +} EXPORT_SYMBOL(napi_gro_frags); static int process_backlog(struct napi_struct *napi, int quota) -- cgit v1.2.3 From 86911732d3996a9da07914b280621450111bb6da Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 29 Jan 2009 14:19:50 +0000 Subject: gro: Avoid copying headers of unmerged packets Unfortunately simplicity isn't always the best. The fraginfo interface turned out to be suboptimal. The problem was quite obvious. For every packet, we have to copy the headers from the frags structure into skb->head, even though for 99% of the packets this part is immediately thrown away after the merge. LRO didn't have this problem because it directly read the headers from the frags structure. This patch attempts to address this by creating an interface that allows GRO to access the headers in the first frag without having to copy it. Because all drivers that use frags place the headers in the first frag this optimisation should be enough. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 2 ++ net/core/dev.c | 70 +++++++++++++++++++++++++++++++++++++++++++-------- net/core/skbuff.c | 23 +++++++++++------ net/ipv4/af_inet.c | 10 ++++---- net/ipv4/tcp.c | 16 ++++++------ net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/af_inet6.c | 30 ++++++++++++++-------- net/ipv6/tcp_ipv6.c | 2 +- 8 files changed, 111 insertions(+), 44 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 2eb057a74654..378fa69d625a 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -98,6 +98,8 @@ drop: int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb) { + skb_gro_reset_offset(skb); + return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb); } EXPORT_SYMBOL(vlan_gro_receive); diff --git a/net/core/dev.c b/net/core/dev.c index cd23ae15a1d5..df406dcf7482 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -215,6 +215,13 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; } +static inline void *skb_gro_mac_header(struct sk_buff *skb) +{ + return skb_headlen(skb) ? skb_mac_header(skb) : + page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset; +} + /* Device list insertion */ static int list_netdevice(struct net_device *dev) { @@ -2350,7 +2357,6 @@ static int napi_gro_complete(struct sk_buff *skb) out: skb_shinfo(skb)->gso_size = 0; - __skb_push(skb, -skb_network_offset(skb)); return netif_receive_skb(skb); } @@ -2368,6 +2374,25 @@ void napi_gro_flush(struct napi_struct *napi) } EXPORT_SYMBOL(napi_gro_flush); +void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) +{ + unsigned int offset = skb_gro_offset(skb); + + hlen += offset; + if (hlen <= skb_headlen(skb)) + return skb->data + offset; + + if (unlikely(!skb_shinfo(skb)->nr_frags || + skb_shinfo(skb)->frags[0].size <= + hlen - skb_headlen(skb) || + PageHighMem(skb_shinfo(skb)->frags[0].page))) + return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + + return page_address(skb_shinfo(skb)->frags[0].page) + + skb_shinfo(skb)->frags[0].page_offset + offset; +} +EXPORT_SYMBOL(skb_gro_header); + int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; @@ -2388,11 +2413,13 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { struct sk_buff *p; + void *mac; if (ptype->type != type || ptype->dev || !ptype->gro_receive) continue; - skb_reset_network_header(skb); + skb_set_network_header(skb, skb_gro_offset(skb)); + mac = skb_gro_mac_header(skb); mac_len = skb->network_header - skb->mac_header; skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; @@ -2406,8 +2433,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) continue; if (p->mac_len != mac_len || - memcmp(skb_mac_header(p), skb_mac_header(skb), - mac_len)) + memcmp(skb_mac_header(p), mac, mac_len)) NAPI_GRO_CB(p)->same_flow = 0; } @@ -2434,13 +2460,11 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) { - __skb_push(skb, -skb_network_offset(skb)); + if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) goto normal; - } NAPI_GRO_CB(skb)->count = 1; - skb_shinfo(skb)->gso_size = skb->len; + skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; napi->gro_list = skb; ret = GRO_HELD; @@ -2488,6 +2512,8 @@ EXPORT_SYMBOL(napi_skb_finish); int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { + skb_gro_reset_offset(skb); + return napi_skb_finish(__napi_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -2506,6 +2532,7 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, { struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; + struct ethhdr *eth; napi->skb = NULL; @@ -2525,13 +2552,23 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, skb->len += info->len; skb->truesize += info->len; - if (!pskb_may_pull(skb, ETH_HLEN)) { + skb_reset_mac_header(skb); + skb_gro_reset_offset(skb); + + eth = skb_gro_header(skb, sizeof(*eth)); + if (!eth) { napi_reuse_skb(napi, skb); skb = NULL; goto out; } - skb->protocol = eth_type_trans(skb, dev); + skb_gro_pull(skb, sizeof(*eth)); + + /* + * This works because the only protocols we care about don't require + * special handling. We'll fix it up properly at the end. + */ + skb->protocol = eth->h_proto; skb->ip_summed = info->ip_summed; skb->csum = info->csum; @@ -2544,10 +2581,21 @@ EXPORT_SYMBOL(napi_fraginfo_skb); int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { int err = NET_RX_SUCCESS; + int may; switch (ret) { case GRO_NORMAL: - return netif_receive_skb(skb); + case GRO_HELD: + may = pskb_may_pull(skb, skb_gro_offset(skb)); + BUG_ON(!may); + + skb->protocol = eth_type_trans(skb, napi->dev); + + if (ret == GRO_NORMAL) + return netif_receive_skb(skb); + + skb_gro_pull(skb, -ETH_HLEN); + break; case GRO_DROP: err = NET_RX_DROP; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 2e5f2ca3bdcd..f9f4065a7e9b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2584,17 +2584,21 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct sk_buff *p = *head; struct sk_buff *nskb; unsigned int headroom; - unsigned int hlen = p->data - skb_mac_header(p); - unsigned int len = skb->len; + unsigned int len = skb_gro_len(skb); - if (hlen + p->len + len >= 65536) + if (p->len + len >= 65536) return -E2BIG; if (skb_shinfo(p)->frag_list) goto merge; - else if (!skb_headlen(p) && !skb_headlen(skb) && - skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags < + else if (skb_headlen(skb) <= skb_gro_offset(skb) && + skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <= MAX_SKB_FRAGS) { + skb_shinfo(skb)->frags[0].page_offset += + skb_gro_offset(skb) - skb_headlen(skb); + skb_shinfo(skb)->frags[0].size -= + skb_gro_offset(skb) - skb_headlen(skb); + memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags, skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); @@ -2611,7 +2615,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) } headroom = skb_headroom(p); - nskb = netdev_alloc_skb(p->dev, headroom); + nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p)); if (unlikely(!nskb)) return -ENOMEM; @@ -2619,12 +2623,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) nskb->mac_len = p->mac_len; skb_reserve(nskb, headroom); + __skb_put(nskb, skb_gro_offset(p)); - skb_set_mac_header(nskb, -hlen); + skb_set_mac_header(nskb, skb_mac_header(p) - p->data); skb_set_network_header(nskb, skb_network_offset(p)); skb_set_transport_header(nskb, skb_transport_offset(p)); - memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen); + __skb_pull(p, skb_gro_offset(p)); + memcpy(skb_mac_header(nskb), skb_mac_header(p), + p->data - skb_mac_header(p)); *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 743f5542d65a..d6770f295d5b 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1253,10 +1253,10 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, int proto; int id; - if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + iph = skb_gro_header(skb, sizeof(*iph)); + if (unlikely(!iph)) goto out; - iph = ip_hdr(skb); proto = iph->protocol & (MAX_INET_PROTOS - 1); rcu_read_lock(); @@ -1270,7 +1270,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) goto out_unlock; - flush = ntohs(iph->tot_len) != skb->len || + flush = ntohs(iph->tot_len) != skb_gro_len(skb) || iph->frag_off != htons(IP_DF); id = ntohs(iph->id); @@ -1298,8 +1298,8 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, } NAPI_GRO_CB(skb)->flush |= flush; - __skb_pull(skb, sizeof(*iph)); - skb_reset_transport_header(skb); + skb_gro_pull(skb, sizeof(*iph)); + skb_set_transport_header(skb, skb_gro_offset(skb)); pp = ops->gro_receive(head, skb); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0cd71b84e483..1cd608253940 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2481,19 +2481,19 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) unsigned int mss = 1; int flush = 1; - if (!pskb_may_pull(skb, sizeof(*th))) + th = skb_gro_header(skb, sizeof(*th)); + if (unlikely(!th)) goto out; - th = tcp_hdr(skb); thlen = th->doff * 4; if (thlen < sizeof(*th)) goto out; - if (!pskb_may_pull(skb, thlen)) + th = skb_gro_header(skb, thlen); + if (unlikely(!th)) goto out; - th = tcp_hdr(skb); - __skb_pull(skb, thlen); + skb_gro_pull(skb, thlen); flags = tcp_flag_word(th); @@ -2521,10 +2521,10 @@ found: flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); - total = p->len; + total = skb_gro_len(p); mss = skb_shinfo(p)->gso_size; - flush |= skb->len > mss || skb->len <= 0; + flush |= skb_gro_len(skb) > mss || !skb_gro_len(skb); flush |= ntohl(th2->seq) + total != ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { @@ -2537,7 +2537,7 @@ found: tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); out_check_final: - flush = skb->len < mss; + flush = skb_gro_len(skb) < mss; flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 19d7b429a262..f6b962f56ab4 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2355,7 +2355,7 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) switch (skb->ip_summed) { case CHECKSUM_COMPLETE: - if (!tcp_v4_check(skb->len, iph->saddr, iph->daddr, + if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c802bc1658a8..bd91eadcbe3f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -799,24 +799,34 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, int proto; __wsum csum; - if (unlikely(!pskb_may_pull(skb, sizeof(*iph)))) + iph = skb_gro_header(skb, sizeof(*iph)); + if (unlikely(!iph)) goto out; - iph = ipv6_hdr(skb); - __skb_pull(skb, sizeof(*iph)); + skb_gro_pull(skb, sizeof(*iph)); + skb_set_transport_header(skb, skb_gro_offset(skb)); - flush += ntohs(iph->payload_len) != skb->len; + flush += ntohs(iph->payload_len) != skb_gro_len(skb); rcu_read_lock(); - proto = ipv6_gso_pull_exthdrs(skb, iph->nexthdr); - iph = ipv6_hdr(skb); - IPV6_GRO_CB(skb)->proto = proto; + proto = iph->nexthdr; ops = rcu_dereference(inet6_protos[proto]); - if (!ops || !ops->gro_receive) - goto out_unlock; + if (!ops || !ops->gro_receive) { + __pskb_pull(skb, skb_gro_offset(skb)); + proto = ipv6_gso_pull_exthdrs(skb, proto); + skb_gro_pull(skb, -skb_transport_offset(skb)); + skb_reset_transport_header(skb); + __skb_push(skb, skb_gro_offset(skb)); + + if (!ops || !ops->gro_receive) + goto out_unlock; + + iph = ipv6_hdr(skb); + } + + IPV6_GRO_CB(skb)->proto = proto; flush--; - skb_reset_transport_header(skb); nlen = skb_network_header_len(skb); for (p = *head; p; p = p->next) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index e5b85d45bee8..00f1269e11e9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -948,7 +948,7 @@ struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) switch (skb->ip_summed) { case CHECKSUM_COMPLETE: - if (!tcp_v6_check(skb->len, &iph->saddr, &iph->daddr, + if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; -- cgit v1.2.3 From 81705ad1b2f926d2ef15ed95074a9c1fa9fb4dc4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 29 Jan 2009 14:19:51 +0000 Subject: gro: Do not merge paged packets into frag_list gro: Do not merge paged packets into frag_list Bigger is not always better :) It was easy to continue to merged packets into frag_list after the page array is full. However, this turns out to be worse than LRO because frag_list is a much less efficient form of storage than the page array. So we're better off stopping the merge and starting a new entry with an empty page array. In future we can optimise this further by doing frag_list merging but making sure that we continue to fill in the page array. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f9f4065a7e9b..d386f1082ebd 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2591,9 +2591,11 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) if (skb_shinfo(p)->frag_list) goto merge; - else if (skb_headlen(skb) <= skb_gro_offset(skb) && - skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <= - MAX_SKB_FRAGS) { + else if (skb_headlen(skb) <= skb_gro_offset(skb)) { + if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags > + MAX_SKB_FRAGS) + return -E2BIG; + skb_shinfo(skb)->frags[0].page_offset += skb_gro_offset(skb) - skb_headlen(skb); skb_shinfo(skb)->frags[0].size -= -- cgit v1.2.3 From 80595d59ba9917227856e663da249c2276a8628d Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 29 Jan 2009 14:19:52 +0000 Subject: gro: Open-code memcpy in napi_fraginfo_skb This patch optimises napi_fraginfo_skb to only copy the bits necessary. We also open-code the memcpy so that the alignment information is always available to gcc. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index df406dcf7482..ec5be1c7f2f1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2533,6 +2533,8 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, struct net_device *dev = napi->dev; struct sk_buff *skb = napi->skb; struct ethhdr *eth; + skb_frag_t *frag; + int i; napi->skb = NULL; @@ -2545,8 +2547,14 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi, } BUG_ON(info->nr_frags > MAX_SKB_FRAGS); + frag = &info->frags[info->nr_frags - 1]; + + for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) { + skb_fill_page_desc(skb, i, frag->page, frag->page_offset, + frag->size); + frag++; + } skb_shinfo(skb)->nr_frags = info->nr_frags; - memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags)); skb->data_len = info->len; skb->len += info->len; -- cgit v1.2.3 From b8abde45d7d6ab9e8ceced9b5990eeb1149d0b97 Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Tue, 27 Jan 2009 19:26:28 +0530 Subject: mac80211: Cancel the dynamic ps timer in ioctl_siwpower. If the dynamic power save timer has been started before the power save is disabled using iwconfig, we fail to cancel the timer. Hence cancel it while disabling power save. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/wext.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 70a29b657b61..5c88b8246bbb 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -906,6 +906,8 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, IEEE80211_CONF_CHANGE_PS); if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) ieee80211_send_nullfunc(local, sdata, 0); + del_timer_sync(&local->dynamic_ps_timer); + cancel_work_sync(&local->dynamic_ps_enable_work); } } -- cgit v1.2.3 From 4fb669948116d928ae44262ab7743732c574630d Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 1 Feb 2009 00:41:42 -0800 Subject: net: Optimize memory usage when splicing from sockets. The recent fix of data corruption when splicing from sockets uses memory very inefficiently allocating a new page to copy each chunk of linear part of skb. This patch uses the same page until it's full (almost) by caching the page in sk_sndmsg_page field. With changes from David S. Miller Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/skbuff.c | 47 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 36 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f20e758fe46b..e55d1ef5690d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1333,14 +1333,39 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i) put_page(spd->pages[i]); } -static inline struct page *linear_to_page(struct page *page, unsigned int len, - unsigned int offset) -{ - struct page *p = alloc_pages(GFP_KERNEL, 0); +static inline struct page *linear_to_page(struct page *page, unsigned int *len, + unsigned int *offset, + struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + struct page *p = sk->sk_sndmsg_page; + unsigned int off; + + if (!p) { +new_page: + p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0); + if (!p) + return NULL; - if (!p) - return NULL; - memcpy(page_address(p) + offset, page_address(page) + offset, len); + off = sk->sk_sndmsg_off = 0; + /* hold one ref to this page until it's full */ + } else { + unsigned int mlen; + + off = sk->sk_sndmsg_off; + mlen = PAGE_SIZE - off; + if (mlen < 64 && mlen < *len) { + put_page(p); + goto new_page; + } + + *len = min_t(unsigned int, *len, mlen); + } + + memcpy(page_address(p) + off, page_address(page) + *offset, *len); + sk->sk_sndmsg_off += *len; + *offset = off; + get_page(p); return p; } @@ -1349,21 +1374,21 @@ static inline struct page *linear_to_page(struct page *page, unsigned int len, * Fill page/offset/length into spd, if it can hold more pages. */ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, - unsigned int len, unsigned int offset, + unsigned int *len, unsigned int offset, struct sk_buff *skb, int linear) { if (unlikely(spd->nr_pages == PIPE_BUFFERS)) return 1; if (linear) { - page = linear_to_page(page, len, offset); + page = linear_to_page(page, len, &offset, skb); if (!page) return 1; } else get_page(page); spd->pages[spd->nr_pages] = page; - spd->partial[spd->nr_pages].len = len; + spd->partial[spd->nr_pages].len = *len; spd->partial[spd->nr_pages].offset = offset; spd->nr_pages++; @@ -1405,7 +1430,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff, /* the linear region may spread across several pages */ flen = min_t(unsigned int, flen, PAGE_SIZE - poff); - if (spd_fill_page(spd, page, flen, poff, skb, linear)) + if (spd_fill_page(spd, page, &flen, poff, skb, linear)) return 1; __segment_seek(&page, &poff, &plen, flen); -- cgit v1.2.3 From 09640e6365c679b5642b1c41b6d7078f51689ddf Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sun, 1 Feb 2009 00:45:17 -0800 Subject: net: replace uses of __constant_{endian} Base versions handle constant folding now. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/802/psnap.c | 2 +- net/8021q/vlan.c | 2 +- net/appletalk/ddp.c | 4 ++-- net/ax25/af_ax25.c | 2 +- net/bridge/br_netfilter.c | 2 +- net/can/af_can.c | 2 +- net/decnet/af_decnet.c | 2 +- net/decnet/dn_route.c | 2 +- net/dsa/mv88e6123_61_65.c | 2 +- net/dsa/mv88e6131.c | 2 +- net/dsa/tag_dsa.c | 2 +- net/dsa/tag_edsa.c | 2 +- net/dsa/tag_trailer.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/ipconfig.c | 8 ++++---- net/ipv4/netfilter/nf_nat_snmp_basic.c | 4 ++-- net/ipv4/route.c | 4 ++-- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipv6/route.c | 4 ++-- net/ipv6/xfrm6_policy.c | 2 +- net/ipx/af_ipx.c | 4 ++-- net/irda/irmod.c | 2 +- net/llc/llc_core.c | 4 ++-- net/netfilter/ipvs/ip_vs_sync.c | 4 ++-- net/netfilter/nf_conntrack_amanda.c | 4 ++-- net/netfilter/nf_conntrack_h323_main.c | 8 ++++---- net/netfilter/nf_conntrack_netbios_ns.c | 2 +- net/netfilter/nf_conntrack_pptp.c | 2 +- net/phonet/af_phonet.c | 2 +- net/sctp/output.c | 2 +- net/sctp/sm_make_chunk.c | 4 ++-- net/x25/af_x25.c | 2 +- 35 files changed, 50 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/802/psnap.c b/net/802/psnap.c index 70980baeb682..6ed711748f26 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -51,7 +51,7 @@ static int snap_rcv(struct sk_buff *skb, struct net_device *dev, int rc = 1; struct datalink_proto *proto; static struct packet_type snap_packet_type = { - .type = __constant_htons(ETH_P_SNAP), + .type = cpu_to_be16(ETH_P_SNAP), }; if (unlikely(!pskb_may_pull(skb, 5))) diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 41e8f65bd3f0..4163ea65bf41 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -52,7 +52,7 @@ static const char vlan_copyright[] = "Ben Greear "; static const char vlan_buggyright[] = "David S. Miller "; static struct packet_type vlan_packet_type = { - .type = __constant_htons(ETH_P_8021Q), + .type = cpu_to_be16(ETH_P_8021Q), .func = vlan_skb_recv, /* VLAN receive method */ }; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 5abce07fb50a..510a6782da8f 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1861,12 +1861,12 @@ static struct notifier_block ddp_notifier = { }; static struct packet_type ltalk_packet_type = { - .type = __constant_htons(ETH_P_LOCALTALK), + .type = cpu_to_be16(ETH_P_LOCALTALK), .func = ltalk_rcv, }; static struct packet_type ppptalk_packet_type = { - .type = __constant_htons(ETH_P_PPPTALK), + .type = cpu_to_be16(ETH_P_PPPTALK), .func = atalk_rcv, }; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 00d9e5e13158..d127fd3ba5c6 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1986,7 +1986,7 @@ static const struct proto_ops ax25_proto_ops = { * Called by socket.c on kernel start up */ static struct packet_type ax25_packet_type = { - .type = __constant_htons(ETH_P_AX25), + .type = cpu_to_be16(ETH_P_AX25), .dev = NULL, /* All devices */ .func = ax25_kiss_rcv, }; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index cf754ace0b75..3953ac4214c8 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -107,7 +107,7 @@ static void fake_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_ops fake_dst_ops = { .family = AF_INET, - .protocol = __constant_htons(ETH_P_IP), + .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, .entries = ATOMIC_INIT(0), }; diff --git a/net/can/af_can.c b/net/can/af_can.c index fa417ca6cbe6..d90e8dd975fc 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -828,7 +828,7 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, */ static struct packet_type can_packet __read_mostly = { - .type = __constant_htons(ETH_P_CAN), + .type = cpu_to_be16(ETH_P_CAN), .dev = NULL, .func = can_rcv, }; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index cf0e18499297..12bf7d4c16c6 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2113,7 +2113,7 @@ static struct notifier_block dn_dev_notifier = { extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); static struct packet_type dn_dix_packet_type = { - .type = __constant_htons(ETH_P_DNA_RT), + .type = cpu_to_be16(ETH_P_DNA_RT), .dev = NULL, /* All devices */ .func = dn_route_rcv, }; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index c754670b7fca..5130dee0b384 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -124,7 +124,7 @@ int decnet_dst_gc_interval = 2; static struct dst_ops dn_dst_ops = { .family = PF_DECnet, - .protocol = __constant_htons(ETH_P_DNA_RT), + .protocol = cpu_to_be16(ETH_P_DNA_RT), .gc_thresh = 128, .gc = dn_dst_gc, .check = dn_dst_check, diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c index ec8c6a0482d3..100318722214 100644 --- a/net/dsa/mv88e6123_61_65.c +++ b/net/dsa/mv88e6123_61_65.c @@ -394,7 +394,7 @@ static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds) } static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { - .tag_protocol = __constant_htons(ETH_P_EDSA), + .tag_protocol = cpu_to_be16(ETH_P_EDSA), .priv_size = sizeof(struct mv88e6xxx_priv_state), .probe = mv88e6123_61_65_probe, .setup = mv88e6123_61_65_setup, diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index 374d46a01265..70fae2444cb6 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -353,7 +353,7 @@ static int mv88e6131_get_sset_count(struct dsa_switch *ds) } static struct dsa_switch_driver mv88e6131_switch_driver = { - .tag_protocol = __constant_htons(ETH_P_DSA), + .tag_protocol = cpu_to_be16(ETH_P_DSA), .priv_size = sizeof(struct mv88e6xxx_priv_state), .probe = mv88e6131_probe, .setup = mv88e6131_setup, diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index f99a019b939e..63e532a69fdb 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -176,7 +176,7 @@ out: } static struct packet_type dsa_packet_type = { - .type = __constant_htons(ETH_P_DSA), + .type = cpu_to_be16(ETH_P_DSA), .func = dsa_rcv, }; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 328ec957f786..6197f9a7ef42 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -195,7 +195,7 @@ out: } static struct packet_type edsa_packet_type = { - .type = __constant_htons(ETH_P_EDSA), + .type = cpu_to_be16(ETH_P_EDSA), .func = edsa_rcv, }; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index b59132878ad1..d7e7f424ff0c 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -112,7 +112,7 @@ out: } static struct packet_type trailer_packet_type = { - .type = __constant_htons(ETH_P_TRAILER), + .type = cpu_to_be16(ETH_P_TRAILER), .func = trailer_rcv, }; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 8789d2bb1b06..7bf35582f656 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1103,7 +1103,7 @@ drop: } static struct packet_type econet_packet_type = { - .type = __constant_htons(ETH_P_ECONET), + .type = cpu_to_be16(ETH_P_ECONET), .func = econet_rcv, }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d6770f295d5b..957cd054732c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1501,7 +1501,7 @@ static int ipv4_proc_init(void); */ static struct packet_type ip_packet_type = { - .type = __constant_htons(ETH_P_IP), + .type = cpu_to_be16(ETH_P_IP), .func = ip_rcv, .gso_send_check = inet_gso_send_check, .gso_segment = inet_gso_segment, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 29a74c01d8de..3f6b7354699b 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1226,7 +1226,7 @@ void arp_ifdown(struct net_device *dev) */ static struct packet_type arp_packet_type = { - .type = __constant_htons(ETH_P_ARP), + .type = cpu_to_be16(ETH_P_ARP), .func = arp_rcv, }; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index d722013c1cae..90d22ae0a419 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -100,8 +100,8 @@ #define CONF_NAMESERVERS_MAX 3 /* Maximum number of nameservers - '3' from resolv.h */ -#define NONE __constant_htonl(INADDR_NONE) -#define ANY __constant_htonl(INADDR_ANY) +#define NONE cpu_to_be32(INADDR_NONE) +#define ANY cpu_to_be32(INADDR_ANY) /* * Public IP configuration @@ -406,7 +406,7 @@ static int __init ic_defaults(void) static int ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type rarp_packet_type __initdata = { - .type = __constant_htons(ETH_P_RARP), + .type = cpu_to_be16(ETH_P_RARP), .func = ic_rarp_recv, }; @@ -568,7 +568,7 @@ struct bootp_pkt { /* BOOTP packet format */ static int ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); static struct packet_type bootp_packet_type __initdata = { - .type = __constant_htons(ETH_P_IP), + .type = cpu_to_be16(ETH_P_IP), .func = ic_bootp_recv, }; diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 182f845de92f..d9521f6f9ed0 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1292,7 +1292,7 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = { .expect_policy = &snmp_exp_policy, .name = "snmp", .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = __constant_htons(SNMP_PORT), + .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), .tuple.dst.protonum = IPPROTO_UDP, }; @@ -1302,7 +1302,7 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { .expect_policy = &snmp_exp_policy, .name = "snmp_trap", .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT), + .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), .tuple.dst.protonum = IPPROTO_UDP, }; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6a9e204c8024..5caee609be06 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -151,7 +151,7 @@ static void rt_emergency_hash_rebuild(struct net *net); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, - .protocol = __constant_htons(ETH_P_IP), + .protocol = cpu_to_be16(ETH_P_IP), .gc = rt_garbage_collect, .check = ipv4_dst_check, .destroy = ipv4_dst_destroy, @@ -2696,7 +2696,7 @@ static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, - .protocol = __constant_htons(ETH_P_IP), + .protocol = cpu_to_be16(ETH_P_IP), .destroy = ipv4_dst_destroy, .check = ipv4_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 2ad24ba31f9d..60d918c96a4f 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -241,7 +241,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm4_dst_ops = { .family = AF_INET, - .protocol = __constant_htons(ETH_P_IP), + .protocol = cpu_to_be16(ETH_P_IP), .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, .destroy = xfrm4_dst_destroy, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bd91eadcbe3f..fa2ac7ee662f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -890,7 +890,7 @@ out_unlock: } static struct packet_type ipv6_packet_type = { - .type = __constant_htons(ETH_P_IPV6), + .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, .gso_send_check = ipv6_gso_send_check, .gso_segment = ipv6_gso_segment, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9c574235c905..c3d486a3edad 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -98,7 +98,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, - .protocol = __constant_htons(ETH_P_IPV6), + .protocol = cpu_to_be16(ETH_P_IPV6), .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, @@ -117,7 +117,7 @@ static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, - .protocol = __constant_htons(ETH_P_IPV6), + .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 97ab068e8ccc..b4b16a43f277 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -272,7 +272,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops = { .family = AF_INET6, - .protocol = __constant_htons(ETH_P_IPV6), + .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .destroy = xfrm6_dst_destroy, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index b6e70f92e7fb..43d0ffc6d565 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1959,12 +1959,12 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { SOCKOPS_WRAP(ipx_dgram, PF_IPX); static struct packet_type ipx_8023_packet_type = { - .type = __constant_htons(ETH_P_802_3), + .type = cpu_to_be16(ETH_P_802_3), .func = ipx_rcv, }; static struct packet_type ipx_dix_packet_type = { - .type = __constant_htons(ETH_P_IPX), + .type = cpu_to_be16(ETH_P_IPX), .func = ipx_rcv, }; diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 4c487a883725..1bb607f2f5c7 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(irda_debug); * Tell the kernel how IrDA packets should be handled. */ static struct packet_type irda_packet_type = { - .type = __constant_htons(ETH_P_IRDA), + .type = cpu_to_be16(ETH_P_IRDA), .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */ }; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index 50d5b10e23a2..a7fe1adc378d 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -148,12 +148,12 @@ void llc_sap_close(struct llc_sap *sap) } static struct packet_type llc_packet_type = { - .type = __constant_htons(ETH_P_802_2), + .type = cpu_to_be16(ETH_P_802_2), .func = llc_rcv, }; static struct packet_type llc_tr_packet_type = { - .type = __constant_htons(ETH_P_TR_802_2), + .type = cpu_to_be16(ETH_P_TR_802_2), .func = llc_rcv, }; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 6be5d4efa51b..5c48378a852f 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -149,8 +149,8 @@ static struct task_struct *sync_backup_thread; /* multicast addr */ static struct sockaddr_in mcast_addr = { .sin_family = AF_INET, - .sin_port = __constant_htons(IP_VS_SYNC_PORT), - .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP), + .sin_port = cpu_to_be16(IP_VS_SYNC_PORT), + .sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP), }; diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index 4f8fcf498545..07d9d8857e5d 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -177,7 +177,7 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { .me = THIS_MODULE, .help = amanda_help, .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = __constant_htons(10080), + .tuple.src.u.udp.port = cpu_to_be16(10080), .tuple.dst.protonum = IPPROTO_UDP, .expect_policy = &amanda_exp_policy, }, @@ -186,7 +186,7 @@ static struct nf_conntrack_helper amanda_helper[2] __read_mostly = { .me = THIS_MODULE, .help = amanda_help, .tuple.src.l3num = AF_INET6, - .tuple.src.u.udp.port = __constant_htons(10080), + .tuple.src.u.udp.port = cpu_to_be16(10080), .tuple.dst.protonum = IPPROTO_UDP, .expect_policy = &amanda_exp_policy, }, diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 687bd633c3d7..66369490230e 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -1167,7 +1167,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { .name = "Q.931", .me = THIS_MODULE, .tuple.src.l3num = AF_INET, - .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), + .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, .help = q931_help, .expect_policy = &q931_exp_policy, @@ -1176,7 +1176,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_q931[] __read_mostly = { .name = "Q.931", .me = THIS_MODULE, .tuple.src.l3num = AF_INET6, - .tuple.src.u.tcp.port = __constant_htons(Q931_PORT), + .tuple.src.u.tcp.port = cpu_to_be16(Q931_PORT), .tuple.dst.protonum = IPPROTO_TCP, .help = q931_help, .expect_policy = &q931_exp_policy, @@ -1741,7 +1741,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { .name = "RAS", .me = THIS_MODULE, .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = __constant_htons(RAS_PORT), + .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, .help = ras_help, .expect_policy = &ras_exp_policy, @@ -1750,7 +1750,7 @@ static struct nf_conntrack_helper nf_conntrack_helper_ras[] __read_mostly = { .name = "RAS", .me = THIS_MODULE, .tuple.src.l3num = AF_INET6, - .tuple.src.u.udp.port = __constant_htons(RAS_PORT), + .tuple.src.u.udp.port = cpu_to_be16(RAS_PORT), .tuple.dst.protonum = IPPROTO_UDP, .help = ras_help, .expect_policy = &ras_exp_policy, diff --git a/net/netfilter/nf_conntrack_netbios_ns.c b/net/netfilter/nf_conntrack_netbios_ns.c index 5af4273b4668..8a3875e36ec2 100644 --- a/net/netfilter/nf_conntrack_netbios_ns.c +++ b/net/netfilter/nf_conntrack_netbios_ns.c @@ -105,7 +105,7 @@ static struct nf_conntrack_expect_policy exp_policy = { static struct nf_conntrack_helper helper __read_mostly = { .name = "netbios-ns", .tuple.src.l3num = AF_INET, - .tuple.src.u.udp.port = __constant_htons(NMBD_PORT), + .tuple.src.u.udp.port = cpu_to_be16(NMBD_PORT), .tuple.dst.protonum = IPPROTO_UDP, .me = THIS_MODULE, .help = help, diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 9e169ef2e854..72cca638a82d 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -591,7 +591,7 @@ static struct nf_conntrack_helper pptp __read_mostly = { .name = "pptp", .me = THIS_MODULE, .tuple.src.l3num = AF_INET, - .tuple.src.u.tcp.port = __constant_htons(PPTP_CONTROL_PORT), + .tuple.src.u.tcp.port = cpu_to_be16(PPTP_CONTROL_PORT), .tuple.dst.protonum = IPPROTO_TCP, .help = conntrack_pptp_help, .destroy = pptp_destroy_siblings, diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 95bc49ddb8bf..81795ea87794 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -383,7 +383,7 @@ out: } static struct packet_type phonet_packet_type = { - .type = __constant_htons(ETH_P_PHONET), + .type = cpu_to_be16(ETH_P_PHONET), .dev = NULL, .func = phonet_rcv, }; diff --git a/net/sctp/output.c b/net/sctp/output.c index 73639355157e..47bfba6c03ec 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -367,7 +367,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctphdr *sh; - __be32 crc32 = __constant_cpu_to_be32(0); + __be32 crc32 = cpu_to_be32(0); struct sk_buff *nskb; struct sctp_chunk *chunk, *tmp; struct sock *sk; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index fd8acb48c3f2..b40e95f9851b 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -100,11 +100,11 @@ int sctp_chunk_iif(const struct sctp_chunk *chunk) */ static const struct sctp_paramhdr ecap_param = { SCTP_PARAM_ECN_CAPABLE, - __constant_htons(sizeof(struct sctp_paramhdr)), + cpu_to_be16(sizeof(struct sctp_paramhdr)), }; static const struct sctp_paramhdr prsctp_param = { SCTP_PARAM_FWD_TSN_SUPPORT, - __constant_htons(sizeof(struct sctp_paramhdr)), + cpu_to_be16(sizeof(struct sctp_paramhdr)), }; /* A helper to initialize to initialize an op error inside a diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 9fc5b023d111..8f76f4009c24 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1609,7 +1609,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { SOCKOPS_WRAP(x25_proto, AF_X25); static struct packet_type x25_packet_type = { - .type = __constant_htons(ETH_P_X25), + .type = cpu_to_be16(ETH_P_X25), .func = x25_lapb_receive_frame, }; -- cgit v1.2.3 From eefef1cf7653cd4e0aaf743c00ae8345086cdc01 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 1 Feb 2009 01:04:33 -0800 Subject: net: add ARP notify option for devices This adds another inet device option to enable gratuitous ARP when device is brought up or address change. This is handy for clusters or virtualization. Signed-off-by: Stephen Hemminger Signed-off-by: Jeremy Fitzhardinge Signed-off-by: David S. Miller --- net/ipv4/devinet.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'net') diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 309997edc8a5..d519a6a66726 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1075,6 +1075,14 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, } } ip_mc_up(in_dev); + /* fall through */ + case NETDEV_CHANGEADDR: + if (IN_DEV_ARP_NOTIFY(in_dev)) + arp_send(ARPOP_REQUEST, ETH_P_ARP, + in_dev->ifa_list->ifa_address, + dev, + in_dev->ifa_list->ifa_address, + NULL, dev->dev_addr, NULL); break; case NETDEV_DOWN: ip_mc_down(in_dev); @@ -1439,6 +1447,7 @@ static struct devinet_sysctl_table { DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"), DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"), DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"), + DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"), DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"), -- cgit v1.2.3 From b00355db3f88d96810a60011a30cfb2c3469409d Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 1 Feb 2009 01:12:42 -0800 Subject: pkt_sched: sch_hfsc: sch_htb: Add non-work-conserving warning handler. Patrick McHardy suggested: > How about making this flag and the warning message (in a out-of-line > function) globally available? Other qdiscs (f.i. HFSC) can't deal with > inner non-work-conserving qdiscs as well. This patch uses qdisc->flags field of "suspected" child qdisc. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_api.c | 11 +++++++++++ net/sched/sch_hfsc.c | 6 ++---- net/sched/sch_htb.c | 9 +-------- 3 files changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 0fc4a18fd96f..32009793307b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -444,6 +444,17 @@ out: } EXPORT_SYMBOL(qdisc_calculate_pkt_len); +void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc) +{ + if (!(qdisc->flags & TCQ_F_WARN_NONWC)) { + printk(KERN_WARNING + "%s: %s qdisc %X: is non-work-conserving?\n", + txt, qdisc->ops->id, qdisc->handle >> 16); + qdisc->flags |= TCQ_F_WARN_NONWC; + } +} +EXPORT_SYMBOL(qdisc_warn_nonwc); + static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer) { struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 45c31b1a4e1d..74226b265528 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -887,8 +887,7 @@ qdisc_peek_len(struct Qdisc *sch) skb = sch->ops->peek(sch); if (skb == NULL) { - if (net_ratelimit()) - printk("qdisc_peek_len: non work-conserving qdisc ?\n"); + qdisc_warn_nonwc("qdisc_peek_len", sch); return 0; } len = qdisc_pkt_len(skb); @@ -1642,8 +1641,7 @@ hfsc_dequeue(struct Qdisc *sch) skb = qdisc_dequeue_peeked(cl->qdisc); if (skb == NULL) { - if (net_ratelimit()) - printk("HFSC: Non-work-conserving qdisc ?\n"); + qdisc_warn_nonwc("HFSC", cl->qdisc); return NULL; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 2f0f0b04d3fb..77ff510ef8ac 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -114,8 +114,6 @@ struct htb_class { struct tcf_proto *filter_list; int filter_cnt; - int warned; /* only one warning about non work conserving .. */ - /* token bucket parameters */ struct qdisc_rate_table *rate; /* rate table of the class itself */ struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ @@ -809,13 +807,8 @@ next: skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); if (likely(skb != NULL)) break; - if (!cl->warned) { - printk(KERN_WARNING - "htb: class %X isn't work conserving ?!\n", - cl->common.classid); - cl->warned = 1; - } + qdisc_warn_nonwc("htb", cl->un.leaf.q); htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> ptr[0]) + prio); cl = htb_lookup_leaf(q->row[level] + prio, prio, -- cgit v1.2.3 From e82181de5ef4648074765912d2d82d6bd60115eb Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 1 Feb 2009 01:13:05 -0800 Subject: pkt_sched: sch_htb: Warn on too many events. Let's get some info on possible config problems. This patch brings back an old warning, but is printed only once now. With feedback from Patrick McHardy Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 77ff510ef8ac..826f92145261 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -153,6 +153,9 @@ struct htb_sched { int direct_qlen; /* max qlen of above */ long direct_pkts; + +#define HTB_WARN_TOOMANYEVENTS 0x1 + unsigned int warned; /* only one warning */ }; /* find class in global hash table using given handle */ @@ -685,6 +688,10 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, htb_add_to_wait_tree(q, cl, diff); } /* too much load - let's continue on next jiffie (including above) */ + if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { + printk(KERN_WARNING "htb: too many events!\n"); + q->warned |= HTB_WARN_TOOMANYEVENTS; + } return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ; } -- cgit v1.2.3 From 1224736d97e83367bb66e29c2bee0f570f09db3e Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 1 Feb 2009 01:13:22 -0800 Subject: pkt_sched: sch_htb: Use workqueue to schedule after too many events. Patrick McHardy suggested using a workqueue instead of hrtimers to trigger netif_schedule() when there is a problem with setting exact time of this event: 'The differnce - yeah, it shouldn't make much, mainly wake up the qdisc earlier (but not too early) after "too many events" occured _and_ no further enqueue events wake up the qdisc anyways.' Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 826f92145261..355974f610c5 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -156,6 +157,7 @@ struct htb_sched { #define HTB_WARN_TOOMANYEVENTS 0x1 unsigned int warned; /* only one warning */ + struct work_struct work; }; /* find class in global hash table using given handle */ @@ -659,7 +661,7 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * htb_do_events - make mode changes to classes at the level * * Scans event queue for pending events and applies them. Returns time of - * next pending event (0 for no event in pq). + * next pending event (0 for no event in pq, q->now for too many events). * Note: Applied are events whose have cl->pq_key <= q->now. */ static psched_time_t htb_do_events(struct htb_sched *q, int level, @@ -687,12 +689,14 @@ static psched_time_t htb_do_events(struct htb_sched *q, int level, if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } - /* too much load - let's continue on next jiffie (including above) */ + + /* too much load - let's continue after a break for scheduling */ if (!(q->warned & HTB_WARN_TOOMANYEVENTS)) { printk(KERN_WARNING "htb: too many events!\n"); q->warned |= HTB_WARN_TOOMANYEVENTS; } - return q->now + 2 * PSCHED_TICKS_PER_SEC / HZ; + + return q->now; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL @@ -892,7 +896,10 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) } } sch->qstats.overlimits++; - qdisc_watchdog_schedule(&q->watchdog, next_event); + if (likely(next_event > q->now)) + qdisc_watchdog_schedule(&q->watchdog, next_event); + else + schedule_work(&q->work); fin: return skb; } @@ -962,6 +969,14 @@ static const struct nla_policy htb_policy[TCA_HTB_MAX + 1] = { [TCA_HTB_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE }, }; +static void htb_work_func(struct work_struct *work) +{ + struct htb_sched *q = container_of(work, struct htb_sched, work); + struct Qdisc *sch = q->watchdog.qdisc; + + __netif_schedule(qdisc_root(sch)); +} + static int htb_init(struct Qdisc *sch, struct nlattr *opt) { struct htb_sched *q = qdisc_priv(sch); @@ -996,6 +1011,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) INIT_LIST_HEAD(q->drops + i); qdisc_watchdog_init(&q->watchdog, sch); + INIT_WORK(&q->work, htb_work_func); skb_queue_head_init(&q->direct_queue); q->direct_qlen = qdisc_dev(sch)->tx_queue_len; @@ -1188,7 +1204,6 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) kfree(cl); } -/* always caled under BH & queue lock */ static void htb_destroy(struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); @@ -1196,6 +1211,7 @@ static void htb_destroy(struct Qdisc *sch) struct htb_class *cl; unsigned int i; + cancel_work_sync(&q->work); qdisc_watchdog_cancel(&q->watchdog); /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it -- cgit v1.2.3 From ad0f9904444de1309dedd2b9e365cae8af77d9b1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 1 Feb 2009 01:24:55 -0800 Subject: gro: Fix handling of imprecisely split packets The commit 89a1b249edcf9be884e71f92df84d48355c576aa (gro: Avoid copying headers of unmerged packets) only worked for packets which are either completely linear, completely non-linear, or packets which exactly split at the boundary between headers and payload. Anything else would cause bits in the header to go missing if the packet is held by GRO. This may have broken drivers such as ixgbe. This patch fixes the places that assumed or only worked with the above cases. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ec5be1c7f2f1..220f52a1001e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -217,7 +217,7 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) static inline void *skb_gro_mac_header(struct sk_buff *skb) { - return skb_headlen(skb) ? skb_mac_header(skb) : + return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) : page_address(skb_shinfo(skb)->frags[0].page) + skb_shinfo(skb)->frags[0].page_offset; } @@ -2469,11 +2469,19 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) napi->gro_list = skb; ret = GRO_HELD; +pull: + if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) { + if (napi->gro_list == skb) + napi->gro_list = skb->next; + ret = GRO_DROP; + } + ok: return ret; normal: - return GRO_NORMAL; + ret = GRO_NORMAL; + goto pull; } EXPORT_SYMBOL(dev_gro_receive); @@ -2589,14 +2597,10 @@ EXPORT_SYMBOL(napi_fraginfo_skb); int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { int err = NET_RX_SUCCESS; - int may; switch (ret) { case GRO_NORMAL: case GRO_HELD: - may = pskb_may_pull(skb, skb_gro_offset(skb)); - BUG_ON(!may); - skb->protocol = eth_type_trans(skb, napi->dev); if (ret == GRO_NORMAL) -- cgit v1.2.3 From 5add300975cf36b1bd30c461105bb938da260f14 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 1 Feb 2009 01:40:17 -0800 Subject: inet: Fix virt-manager regression due to bind(0) changes. From: Stephen Hemminger Fix regression introduced by a9d8f9110d7e953c2f2b521087a4179677843c2a ("inet: Allowing more than 64k connections and heavily optimize bind(0) time.") Based upon initial patches and feedback from Evegniy Polyakov and Eric Dumazet. From Eric Dumazet: -------------------- Also there might be a problem at line 175 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && --attempts >= 0) { spin_unlock(&head->lock); goto again; If we entered inet_csk_get_port() with a non null snum, we can "goto again" while it was not expected. -------------------- Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index df8e72f07478..9bc6a187bdce 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -172,7 +172,8 @@ tb_found: } else { ret = 1; if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb)) { - if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && --attempts >= 0) { + if (sk->sk_reuse && sk->sk_state != TCP_LISTEN && + smallest_size != -1 && --attempts >= 0) { spin_unlock(&head->lock); goto again; } -- cgit v1.2.3 From 24dd1fa184595ff095a92de807fdf029b2632673 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 1 Feb 2009 12:31:33 -0800 Subject: net: move bsockets outside of read only beginning of struct inet_hashinfo And switch bsockets to atomic_t since it might be changed in parallel. Signed-off-by: Eric Dumazet Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/inet_hashtables.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 9bc6a187bdce..22cd19ee44e5 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -119,7 +119,7 @@ again: (tb->num_owners < smallest_size || smallest_size == -1)) { smallest_size = tb->num_owners; smallest_rover = rover; - if (hashinfo->bsockets > (high - low) + 1) { + if (atomic_read(&hashinfo->bsockets) > (high - low) + 1) { spin_unlock(&head->lock); snum = smallest_rover; goto have_snum; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d7b6178bf48b..625cc5f64c94 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -62,7 +62,7 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, { struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - hashinfo->bsockets++; + atomic_inc(&hashinfo->bsockets); inet_sk(sk)->num = snum; sk_add_bind_node(sk, &tb->owners); @@ -81,7 +81,7 @@ static void __inet_put_port(struct sock *sk) struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; struct inet_bind_bucket *tb; - hashinfo->bsockets--; + atomic_dec(&hashinfo->bsockets); spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; @@ -532,6 +532,7 @@ void inet_hashinfo_init(struct inet_hashinfo *h) { int i; + atomic_set(&h->bsockets, 0); for (i = 0; i < INET_LHTABLE_SIZE; i++) { spin_lock_init(&h->listening_hash[i].lock); INIT_HLIST_NULLS_HEAD(&h->listening_hash[i].head, -- cgit v1.2.3 From f15fbcd7d857ca2ea20b57ba6dfe63aab89d0b8b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 1 Feb 2009 22:24:43 -0800 Subject: ipv4: Delete redundant sk_family assignment sk_alloc now sets sk_family so this is redundant. In fact it caught my eye because sock_init_data already uses sk_family so this is too late anyway. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 957cd054732c..c79087719df0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -369,7 +369,6 @@ lookup_protocol: sock_init_data(sock, sk); sk->sk_destruct = inet_sock_destruct; - sk->sk_family = PF_INET; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; -- cgit v1.2.3 From 9a279bcbe347496799711155ed41a89bc40f79c5 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 4 Feb 2009 16:55:27 -0800 Subject: net: Partially allow skb destructors to be used on receive path As it currently stands, skb destructors are forbidden on the receive path because the protocol end-points will overwrite any existing destructor with their own. This is the reason why we have to call skb_orphan in the loopback driver before we reinject the packet back into the stack, thus creating a period during which loopback traffic isn't charged to any socket. With virtualisation, we have a similar problem in that traffic is reinjected into the stack without being associated with any socket entity, thus providing no natural congestion push-back for those poor folks still stuck with UDP. Now had we been consistent in telling them that UDP simply has no congestion feedback, I could just fob them off. Unfortunately, we appear to have gone to some length in catering for this on the standard UDP path, with skb/socket accounting so that has created a very unhealthy dependency. Alas habits are difficult to break out of, so we may just have to allow skb destructors on the receive path. It turns out that making skb destructors useable on the receive path isn't as easy as it seems. For instance, simply adding skb_orphan to skb_set_owner_r isn't enough. This is because we assume all over the IP stack that skb->sk is an IP socket if present. The new transparent proxy code goes one step further and assumes that skb->sk is the receiving socket if present. Now all of this can be dealt with by adding simple checks such as only treating skb->sk as an IP socket if skb->sk->sk_family matches. However, it turns out that for bridging at least we don't need to do all of this work. This is of interest because most virtualisation setups use bridging so we don't actually go through the IP stack on the host (with the exception of our old nemesis the bridge netfilter, but that's easily taken care of). So this patch simply adds skb_orphan to the point just before we enter the IP stack, but after we've gone through the bridge on the receive path. It also adds an skb_orphan to the one place in netfilter that touches skb->sk/skb->destructor, that is, tproxy. One word of caution, because of the internal code structure, anyone wishing to deploy this must use skb_set_owner_w as opposed to skb_set_owner_r since many functions that create a new skb from an existing one will invoke skb_set_owner_w on the new skb. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ net/netfilter/nf_tproxy_core.c | 1 + 2 files changed, 3 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 220f52a1001e..3337cf98f231 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2288,6 +2288,8 @@ ncls: if (!skb) goto out; + skb_orphan(skb); + type = skb->protocol; list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index cdc97f3105a3..5490fc37c92d 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -71,6 +71,7 @@ int nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) { if (inet_sk(sk)->transparent) { + skb_orphan(skb); skb->sk = sk; skb->destructor = nf_tproxy_destructor; return 1; -- cgit v1.2.3 From 4cc7f68d65558f683c702d4fe3a5aac4c5227b97 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 4 Feb 2009 16:55:54 -0800 Subject: net: Reexport sock_alloc_send_pskb The function sock_alloc_send_pskb is completely useless if not exported since most of the code in it won't be used as is. In fact, this code has already been duplicated in the tun driver. Now that we need accounting in the tun driver, we can in fact use this function as is. So this patch marks it for export again. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/sock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index f3a0d08cbb48..c64996f8a27a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1254,10 +1254,9 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo) * Generic send/receive buffer handlers */ -static struct sk_buff *sock_alloc_send_pskb(struct sock *sk, - unsigned long header_len, - unsigned long data_len, - int noblock, int *errcode) +struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, + unsigned long data_len, int noblock, + int *errcode) { struct sk_buff *skb; gfp_t gfp_mask; @@ -1337,6 +1336,7 @@ failure: *errcode = err; return NULL; } +EXPORT_SYMBOL(sock_alloc_send_pskb); struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode) -- cgit v1.2.3 From 56035022d86fff45299288cb372a42f752ba23fa Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 5 Feb 2009 21:26:52 -0800 Subject: gro: Fix frag_list merging on imprecisely split packets The previous fix ad0f9904444de1309dedd2b9e365cae8af77d9b1 (gro: Fix handling of imprecisely split packets) only fixed the case of frags merging, frag_list merging in the same circumstances were still broken. In particular, the packet headers end up in the data stream. This patch fixes this plus another issue where an imprecisely split packet header may be read incorrectly (this is mostly harmless since it'll simply cause the packet to not match and be rejected for GRO). Thanks to Emil Tantilov and Jeff Kirsher for helping to track this down. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- net/core/skbuff.c | 11 +++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 3337cf98f231..247f1614a796 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2391,7 +2391,8 @@ void *skb_gro_header(struct sk_buff *skb, unsigned int hlen) return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; return page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset + offset; + skb_shinfo(skb)->frags[0].page_offset + + offset - skb_headlen(skb); } EXPORT_SYMBOL(skb_gro_header); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e55d1ef5690d..67f2a2f85827 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2678,6 +2678,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) p = nskb; merge: + if (skb_gro_offset(skb) > skb_headlen(skb)) { + skb_shinfo(skb)->frags[0].page_offset += + skb_gro_offset(skb) - skb_headlen(skb); + skb_shinfo(skb)->frags[0].size -= + skb_gro_offset(skb) - skb_headlen(skb); + skb_gro_reset_offset(skb); + skb_gro_pull(skb, skb_headlen(skb)); + } + + __skb_pull(skb, skb_gro_offset(skb)); + p->prev->next = skb; p->prev = skb; skb_header_release(skb); -- cgit v1.2.3 From ff491a7334acfd74e515c896632e37e401f52676 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Thu, 5 Feb 2009 23:56:36 -0800 Subject: netlink: change return-value logic of netlink_broadcast() Currently, netlink_broadcast() reports errors to the caller if no messages at all were delivered: 1) If, at least, one message has been delivered correctly, returns 0. 2) Otherwise, if no messages at all were delivered due to skb_clone() failure, return -ENOBUFS. 3) Otherwise, if there are no listeners, return -ESRCH. With this patch, the caller knows if the delivery of any of the messages to the listeners have failed: 1) If it fails to deliver any message (for whatever reason), return -ENOBUFS. 2) Otherwise, if all messages were delivered OK, returns 0. 3) Otherwise, if no listeners, return -ESRCH. In the current ctnetlink code and in Netfilter in general, we can add reliable logging and connection tracking event delivery by dropping the packets whose events were not successfully delivered over Netlink. Of course, this option would be settable via /proc as this approach reduces performance (in terms of filtered connections per seconds by a stateful firewall) but providing reliable logging and event delivery (for conntrackd) in return. This patch also changes some clients of netlink_broadcast() that may report ENOBUFS errors via printk. This error handling is not of any help. Instead, the userspace daemons that are listening to those netlink messages should resync themselves with the kernel-side if they hit ENOBUFS. BTW, netlink_broadcast() clients include those that call cn_netlink_send(), nlmsg_multicast() and genlmsg_multicast() since they internally call netlink_broadcast() and return its error value. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 8 ++++++-- net/wimax/op-msg.c | 9 +++------ net/wimax/stack.c | 12 ++++-------- 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9eb895c7a2a9..6ee69c27f806 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -950,6 +950,7 @@ struct netlink_broadcast_data { u32 pid; u32 group; int failure; + int delivery_failure; int congested; int delivered; gfp_t allocation; @@ -999,6 +1000,7 @@ static inline int do_one_broadcast(struct sock *sk, p->skb2 = NULL; } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { netlink_overrun(sk); + p->delivery_failure = 1; } else { p->congested |= val; p->delivered = 1; @@ -1025,6 +1027,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, info.pid = pid; info.group = group; info.failure = 0; + info.delivery_failure = 0; info.congested = 0; info.delivered = 0; info.allocation = allocation; @@ -1045,13 +1048,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, if (info.skb2) kfree_skb(info.skb2); + if (info.delivery_failure || info.failure) + return -ENOBUFS; + if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) yield(); return 0; } - if (info.failure) - return -ENOBUFS; return -ESRCH; } EXPORT_SYMBOL(netlink_broadcast); diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index cb3b4ad53683..5d149c1b5f0d 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -258,7 +258,6 @@ EXPORT_SYMBOL_GPL(wimax_msg_len); */ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) { - int result; struct device *dev = wimax_dev->net_dev->dev.parent; void *msg = skb->data; size_t size = skb->len; @@ -266,11 +265,9 @@ int wimax_msg_send(struct wimax_dev *wimax_dev, struct sk_buff *skb) d_printf(1, dev, "CTX: wimax msg, %zu bytes\n", size); d_dump(2, dev, msg, size); - result = genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); - d_printf(1, dev, "CTX: genl multicast result %d\n", result); - if (result == -ESRCH) /* Nobody connected, ignore it */ - result = 0; /* btw, the skb is freed already */ - return result; + genlmsg_multicast(skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); + d_printf(1, dev, "CTX: genl multicast done\n"); + return 0; } EXPORT_SYMBOL_GPL(wimax_msg_send); diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 3869c0327882..a0ee76b52510 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -163,16 +163,12 @@ int wimax_gnl_re_state_change_send( struct device *dev = wimax_dev_to_dev(wimax_dev); d_fnstart(3, dev, "(wimax_dev %p report_skb %p)\n", wimax_dev, report_skb); - if (report_skb == NULL) + if (report_skb == NULL) { + result = -ENOMEM; goto out; - genlmsg_end(report_skb, header); - result = genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); - if (result == -ESRCH) /* Nobody connected, ignore it */ - result = 0; /* btw, the skb is freed already */ - if (result < 0) { - dev_err(dev, "RE_STCH: Error sending: %d\n", result); - nlmsg_free(report_skb); } + genlmsg_end(report_skb, header); + genlmsg_multicast(report_skb, 0, wimax_gnl_mcg.id, GFP_KERNEL); out: d_fnend(3, dev, "(wimax_dev %p report_skb %p) = %d\n", wimax_dev, report_skb, result); -- cgit v1.2.3 From 69ebbf58f3dff9fb4e5240e472b5869fa869dae1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 6 Feb 2009 23:46:51 -0800 Subject: ipmr: use goto to common label instead of opencoding MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 21a6dc710f20..13e9dd3012b3 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1243,8 +1243,7 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) vif->dev->stats.tx_bytes += skb->len; vif->dev->stats.tx_packets++; ipmr_cache_report(net, skb, vifi, IGMPMSG_WHOLEPKT); - kfree_skb(skb); - return; + goto out_free; } #endif -- cgit v1.2.3 From 910d30b704542b49f83881a4832d8414c6c3d9c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 6 Feb 2009 23:47:14 -0800 Subject: ax25: more common return path joining MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Acked-by: Ralf Baechle Signed-off-by: David S. Miller --- net/ax25/ax25_iface.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ax25/ax25_iface.c b/net/ax25/ax25_iface.c index 8443af57a374..71338f112108 100644 --- a/net/ax25/ax25_iface.c +++ b/net/ax25/ax25_iface.c @@ -61,27 +61,24 @@ void ax25_protocol_release(unsigned int pid) write_lock_bh(&protocol_list_lock); protocol = protocol_list; - if (protocol == NULL) { - write_unlock_bh(&protocol_list_lock); - return; - } + if (protocol == NULL) + goto out; if (protocol->pid == pid) { protocol_list = protocol->next; - write_unlock_bh(&protocol_list_lock); - return; + goto out; } while (protocol != NULL && protocol->next != NULL) { if (protocol->next->pid == pid) { s = protocol->next; protocol->next = protocol->next->next; - write_unlock_bh(&protocol_list_lock); - return; + goto out; } protocol = protocol->next; } +out: write_unlock_bh(&protocol_list_lock); } -- cgit v1.2.3 From d73f08011bc30c03a2bcb1ccd880e4be84aea269 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 6 Feb 2009 23:47:37 -0800 Subject: ipv6/ndisc: join error paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv6/ndisc.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3e2970841bd8..3cd83b85e9ef 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1538,13 +1538,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (rt->rt6i_flags & RTF_GATEWAY) { ND_PRINTK2(KERN_WARNING "ICMPv6 Redirect: destination is not a neighbour.\n"); - dst_release(dst); - return; - } - if (!xrlim_allow(dst, 1*HZ)) { - dst_release(dst); - return; + goto release; } + if (!xrlim_allow(dst, 1*HZ)) + goto release; if (dev->addr_len) { read_lock_bh(&neigh->lock); @@ -1570,8 +1567,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, ND_PRINTK0(KERN_ERR "ICMPv6 Redirect: %s() failed to allocate an skb.\n", __func__); - dst_release(dst); - return; + goto release; } skb_reserve(buff, LL_RESERVED_SPACE(dev)); @@ -1631,6 +1627,10 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, if (likely(idev != NULL)) in6_dev_put(idev); + return; + +release: + dst_release(dst); } static void pndisc_redo(struct sk_buff *skb) -- cgit v1.2.3 From b5f348e5a41b39543c1c5efd661d7fd296dd5281 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 6 Feb 2009 23:48:01 -0800 Subject: ipv6/addrconf: common code located MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ codiff net/ipv6/addrconf.o net/ipv6/addrconf.o.new net/ipv6/addrconf.c: addrconf_notify | -267 1 function changed, 267 bytes removed net/ipv6/addrconf.c: add_addr | +86 1 function changed, 86 bytes added net/ipv6/addrconf.o.new: 2 functions changed, 86 bytes added, 267 bytes removed, diff: -181 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f9afb452249c..03e2a1ad71e9 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2224,10 +2224,24 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) return err; } +static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, + int plen, int scope) +{ + struct inet6_ifaddr *ifp; + + ifp = ipv6_add_addr(idev, addr, plen, scope, IFA_F_PERMANENT); + if (!IS_ERR(ifp)) { + spin_lock_bh(&ifp->lock); + ifp->flags &= ~IFA_F_TENTATIVE; + spin_unlock_bh(&ifp->lock); + ipv6_ifa_notify(RTM_NEWADDR, ifp); + in6_ifa_put(ifp); + } +} + #if defined(CONFIG_IPV6_SIT) || defined(CONFIG_IPV6_SIT_MODULE) static void sit_add_v4_addrs(struct inet6_dev *idev) { - struct inet6_ifaddr * ifp; struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); @@ -2246,14 +2260,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) } if (addr.s6_addr32[3]) { - ifp = ipv6_add_addr(idev, &addr, 128, scope, IFA_F_PERMANENT); - if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->flags &= ~IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - ipv6_ifa_notify(RTM_NEWADDR, ifp); - in6_ifa_put(ifp); - } + add_addr(idev, &addr, 128, scope); return; } @@ -2281,15 +2288,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) else plen = 96; - ifp = ipv6_add_addr(idev, &addr, plen, flag, - IFA_F_PERMANENT); - if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->flags &= ~IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - ipv6_ifa_notify(RTM_NEWADDR, ifp); - in6_ifa_put(ifp); - } + add_addr(idev, &addr, plen, flag); } } } @@ -2299,7 +2298,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) static void init_loopback(struct net_device *dev) { struct inet6_dev *idev; - struct inet6_ifaddr * ifp; /* ::1 */ @@ -2310,14 +2308,7 @@ static void init_loopback(struct net_device *dev) return; } - ifp = ipv6_add_addr(idev, &in6addr_loopback, 128, IFA_HOST, IFA_F_PERMANENT); - if (!IS_ERR(ifp)) { - spin_lock_bh(&ifp->lock); - ifp->flags &= ~IFA_F_TENTATIVE; - spin_unlock_bh(&ifp->lock); - ipv6_ifa_notify(RTM_NEWADDR, ifp); - in6_ifa_put(ifp); - } + add_addr(idev, &in6addr_loopback, 128, IFA_HOST); } static void addrconf_add_linklocal(struct inet6_dev *idev, struct in6_addr *addr) -- cgit v1.2.3 From 1f0fa15432e49547c3fa915644c7e0c0975809e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Fri, 6 Feb 2009 23:48:33 -0800 Subject: net/sunrpc/xprtsock.c: some common code found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit $ diff-funcs xs_udp_write_space net/sunrpc/xprtsock.c net/sunrpc/xprtsock.c xs_tcp_write_space --- net/sunrpc/xprtsock.c:xs_udp_write_space() +++ net/sunrpc/xprtsock.c:xs_tcp_write_space() @@ -1,4 +1,4 @@ - * xs_udp_write_space - callback invoked when socket buffer space + * xs_tcp_write_space - callback invoked when socket buffer space * becomes available * @sk: socket whose state has changed * @@ -7,12 +7,12 @@ * progress, otherwise we'll waste resources thrashing kernel_sendmsg * with a bunch of small requests. */ -static void xs_udp_write_space(struct sock *sk) +static void xs_tcp_write_space(struct sock *sk) { read_lock(&sk->sk_callback_lock); - /* from net/core/sock.c:sock_def_write_space */ - if (sock_writeable(sk)) { + /* from net/core/stream.c:sk_stream_write_space */ + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { struct socket *sock; struct rpc_xprt *xprt; $ codiff net/sunrpc/xprtsock.o net/sunrpc/xprtsock.o.new net/sunrpc/xprtsock.c: xs_tcp_write_space | -163 xs_udp_write_space | -163 2 functions changed, 326 bytes removed net/sunrpc/xprtsock.c: xs_write_space | +179 1 function changed, 179 bytes added net/sunrpc/xprtsock.o.new: 3 functions changed, 179 bytes added, 326 bytes removed, diff: -147 Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/sunrpc/xprtsock.c | 53 ++++++++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5cbb404c4cdf..b49e434c094f 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1215,6 +1215,23 @@ out: read_unlock(&sk->sk_callback_lock); } +static void xs_write_space(struct sock *sk) +{ + struct socket *sock; + struct rpc_xprt *xprt; + + if (unlikely(!(sock = sk->sk_socket))) + return; + clear_bit(SOCK_NOSPACE, &sock->flags); + + if (unlikely(!(xprt = xprt_from_sock(sk)))) + return; + if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) + return; + + xprt_write_space(xprt); +} + /** * xs_udp_write_space - callback invoked when socket buffer space * becomes available @@ -1230,23 +1247,9 @@ static void xs_udp_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); /* from net/core/sock.c:sock_def_write_space */ - if (sock_writeable(sk)) { - struct socket *sock; - struct rpc_xprt *xprt; - - if (unlikely(!(sock = sk->sk_socket))) - goto out; - clear_bit(SOCK_NOSPACE, &sock->flags); - - if (unlikely(!(xprt = xprt_from_sock(sk)))) - goto out; - if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) - goto out; - - xprt_write_space(xprt); - } + if (sock_writeable(sk)) + xs_write_space(sk); - out: read_unlock(&sk->sk_callback_lock); } @@ -1265,23 +1268,9 @@ static void xs_tcp_write_space(struct sock *sk) read_lock(&sk->sk_callback_lock); /* from net/core/stream.c:sk_stream_write_space */ - if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { - struct socket *sock; - struct rpc_xprt *xprt; - - if (unlikely(!(sock = sk->sk_socket))) - goto out; - clear_bit(SOCK_NOSPACE, &sock->flags); + if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) + xs_write_space(sk); - if (unlikely(!(xprt = xprt_from_sock(sk)))) - goto out; - if (test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags) == 0) - goto out; - - xprt_write_space(xprt); - } - - out: read_unlock(&sk->sk_callback_lock); } -- cgit v1.2.3 From 4ae5544f9a33e4ae306e337f96951eb3ff2df6d9 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 8 Feb 2009 18:00:36 +0000 Subject: gro: Remember number of held packets instead of counting every time This patch prepares for the move of the same_flow checks out of dev_gro_receive. As such we need to remember the number of held packets since doing a loop just to count them every time is silly. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 709a9a922258..ae0b66936abe 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2372,6 +2372,7 @@ void napi_gro_flush(struct napi_struct *napi) napi_gro_complete(skb); } + napi->gro_count = 0; napi->gro_list = NULL; } EXPORT_SYMBOL(napi_gro_flush); @@ -2402,7 +2403,6 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) struct packet_type *ptype; __be16 type = skb->protocol; struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; - int count = 0; int same_flow; int mac_len; int ret; @@ -2430,8 +2430,6 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) NAPI_GRO_CB(skb)->free = 0; for (p = napi->gro_list; p; p = p->next) { - count++; - if (!NAPI_GRO_CB(p)->same_flow) continue; @@ -2457,15 +2455,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) *pp = nskb->next; nskb->next = NULL; napi_gro_complete(nskb); - count--; + napi->gro_count--; } if (same_flow) goto ok; - if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) + if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS) goto normal; + napi->gro_count++; NAPI_GRO_CB(skb)->count = 1; skb_shinfo(skb)->gso_size = skb_gro_len(skb); skb->next = napi->gro_list; @@ -2713,6 +2712,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { INIT_LIST_HEAD(&napi->poll_list); + napi->gro_count = 0; napi->gro_list = NULL; napi->skb = NULL; napi->poll = poll; @@ -2741,6 +2741,7 @@ void netif_napi_del(struct napi_struct *napi) } napi->gro_list = NULL; + napi->gro_count = 0; } EXPORT_SYMBOL(netif_napi_del); @@ -5246,6 +5247,7 @@ static int __init net_dev_init(void) queue->backlog.poll = process_backlog; queue->backlog.weight = weight_p; queue->backlog.gro_list = NULL; + queue->backlog.gro_count = 0; } dev_boot_phase = 0; -- cgit v1.2.3 From aa4b9f533ed5a22952e038b9fac2447ccc682124 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 8 Feb 2009 18:00:37 +0000 Subject: gro: Optimise Ethernet header comparison This patch optimises the Ethernet header comparison to use 2-byte and 4-byte xors instead of memcmp. In order to facilitate this, the actual comparison is now carried out by the callers of the shared dev_gro_receive function. This has a significant impact when receiving 1500B packets through 10GbE. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 4 +++- net/core/dev.c | 23 ++--------------------- 2 files changed, 5 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 378fa69d625a..70435af153f2 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -85,7 +85,9 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, goto drop; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = p->dev == skb->dev; + NAPI_GRO_CB(p)->same_flow = + p->dev == skb->dev && !compare_ether_header( + skb_mac_header(p), skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } diff --git a/net/core/dev.c b/net/core/dev.c index ae0b66936abe..1e27a67df242 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -215,13 +215,6 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)]; } -static inline void *skb_gro_mac_header(struct sk_buff *skb) -{ - return skb_mac_header(skb) < skb->data ? skb_mac_header(skb) : - page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset; -} - /* Device list insertion */ static int list_netdevice(struct net_device *dev) { @@ -2415,29 +2408,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - struct sk_buff *p; - void *mac; - if (ptype->type != type || ptype->dev || !ptype->gro_receive) continue; skb_set_network_header(skb, skb_gro_offset(skb)); - mac = skb_gro_mac_header(skb); mac_len = skb->network_header - skb->mac_header; skb->mac_len = mac_len; NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - for (p = napi->gro_list; p; p = p->next) { - if (!NAPI_GRO_CB(p)->same_flow) - continue; - - if (p->mac_len != mac_len || - memcmp(skb_mac_header(p), mac, mac_len)) - NAPI_GRO_CB(p)->same_flow = 0; - } - pp = ptype->gro_receive(&napi->gro_list, skb); break; } @@ -2492,7 +2472,8 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) struct sk_buff *p; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = 1; + NAPI_GRO_CB(p)->same_flow = !compare_ether_header( + skb_mac_header(p), skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } -- cgit v1.2.3 From a5ad24be728d4352b71a81fba471aa41eb71f83a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 8 Feb 2009 18:00:39 +0000 Subject: gro: Optimise IPv4 packet reception As this function can be called more than half a million times for 10GbE, it's important to optimise it as much as we can. This patch does some obvious changes to use 2-byte and 4-byte operations instead of byte-oriented ones where possible. Bit ops are also used to replace logical ops to reduce branching. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/af_inet.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c79087719df0..627be4dc7fb0 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1263,7 +1263,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, if (!ops || !ops->gro_receive) goto out_unlock; - if (iph->version != 4 || iph->ihl != 5) + if (*(u8 *)iph != 0x45) goto out_unlock; if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) @@ -1281,17 +1281,18 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, iph2 = ip_hdr(p); - if (iph->protocol != iph2->protocol || - iph->tos != iph2->tos || - memcmp(&iph->saddr, &iph2->saddr, 8)) { + if ((iph->protocol ^ iph2->protocol) | + (iph->tos ^ iph2->tos) | + (iph->saddr ^ iph2->saddr) | + (iph->daddr ^ iph2->daddr)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } /* All fields must match except length and checksum. */ NAPI_GRO_CB(p)->flush |= - memcmp(&iph->frag_off, &iph2->frag_off, 4) || - (u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) != id; + (iph->ttl ^ iph2->ttl) | + ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; } -- cgit v1.2.3 From aa6320d336971171df1d13c1c284facf10804881 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 8 Feb 2009 18:00:40 +0000 Subject: gro: Optimise TCP packet reception gro: Optimise TCP packet reception As this function can be called more than half a million times for 10GbE, it's important to optimise it as much as we can. This patch uses bit ops to logical ops, as well as open coding memcmp to exploit alignment properties. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 73266b79c19a..90b2f3c192ff 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2478,9 +2478,9 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) struct tcphdr *th2; unsigned int thlen; unsigned int flags; - unsigned int total; unsigned int mss = 1; int flush = 1; + int i; th = skb_gro_header(skb, sizeof(*th)); if (unlikely(!th)) @@ -2504,7 +2504,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb) th2 = tcp_hdr(p); - if (th->source != th2->source || th->dest != th2->dest) { + if ((th->source ^ th2->source) | (th->dest ^ th2->dest)) { NAPI_GRO_CB(p)->same_flow = 0; continue; } @@ -2519,14 +2519,15 @@ found: flush |= flags & TCP_FLAG_CWR; flush |= (flags ^ tcp_flag_word(th2)) & ~(TCP_FLAG_CWR | TCP_FLAG_FIN | TCP_FLAG_PSH); - flush |= th->ack_seq != th2->ack_seq || th->window != th2->window; - flush |= memcmp(th + 1, th2 + 1, thlen - sizeof(*th)); + flush |= (th->ack_seq ^ th2->ack_seq) | (th->window ^ th2->window); + for (i = sizeof(*th); !flush && i < thlen; i += 4) + flush |= *(u32 *)((u8 *)th + i) ^ + *(u32 *)((u8 *)th2 + i); - total = skb_gro_len(p); mss = skb_shinfo(p)->gso_size; - flush |= skb_gro_len(skb) > mss || !skb_gro_len(skb); - flush |= ntohl(th2->seq) + total != ntohl(th->seq); + flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb); + flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); if (flush || skb_gro_receive(head, skb)) { mss = 1; -- cgit v1.2.3 From e374055afbf92c8d128d8538aafc7e765838206e Mon Sep 17 00:00:00 2001 From: Sujith Date: Thu, 29 Jan 2009 09:34:22 +0530 Subject: mac80211: Reset assoc_scan_tries after an unsuccessful scan run Trying to associate with a non-existent SSID stops the state machine after the first run. Subsequent association requests fail to start the scan engine. Fix this by resetting assoc_scan_tries to zero after completing a scan run. Signed-off-by: Sujith Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 9d51e278c1e5..a8755df0cf74 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2415,8 +2415,10 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, ifsta->ssid_len); ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); - } else + } else { + ifsta->assoc_scan_tries = 0; ifsta->state = IEEE80211_STA_MLME_DISABLED; + } } return -1; } -- cgit v1.2.3 From c0415b547d37e8065ad4adf289d11db2f3b16dfd Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Thu, 29 Jan 2009 09:59:43 +0100 Subject: mac80211: Creating new IBSS with fixed BSSID This fixes a bug when creating a new IBSS network with a fixed BSSID. The fixed BSSID situation is now with one of my last patches handled in ieee80211_sta_find_ibss() function. It's more robust to test against (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET), because ifsta->state is not seted right in every situation and so the creating of the new IBSS network sometimes hangs after the first try to scan for a network to merge. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a8755df0cf74..0ece151659c0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -2722,9 +2722,8 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) { ifsta = &sdata->u.sta; - if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || - (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) && - !ieee80211_sta_active_ibss(sdata))) + if ((!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) || + !ieee80211_sta_active_ibss(sdata)) ieee80211_sta_find_ibss(sdata, ifsta); } -- cgit v1.2.3 From c4e3a5844812dd5bf03282e021175d55d608f594 Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Thu, 29 Jan 2009 13:56:20 +0100 Subject: mac80211: IBSS join rework I hold back this patch for around a week to avoid confusion. This is the second step of "mac80211: Fixed BSSID handling revisited". With it, in the situation of a strange merge to the same BSSID (e.g. caused by a TSF overflow) only reset_tsf() is called. And sta_info_flush_delayed() is only called if you change the network manually, not on an automatic BSSID merge. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 0ece151659c0..73808780f538 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1503,13 +1503,22 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss) { struct ieee80211_local *local = sdata->local; - int res, rates, i, j; + int res = 0, rates, i, j; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos; struct ieee80211_supported_band *sband; union iwreq_data wrqu; + if (local->ops->reset_tsf) { + /* Reset own TSF to allow time synchronization work. */ + local->ops->reset_tsf(local_to_hw(local)); + } + + if ((ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) && + memcmp(ifsta->bssid, bss->bssid, ETH_ALEN) == 0) + return res; + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + sdata->u.sta.ie_proberesp_len); if (!skb) { @@ -1520,13 +1529,11 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - /* Remove possible STA entries from other IBSS networks. */ - sta_info_flush_delayed(sdata); - - if (local->ops->reset_tsf) { - /* Reset own TSF to allow time synchronization work. */ - local->ops->reset_tsf(local_to_hw(local)); + if (!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) { + /* Remove possible STA entries from other IBSS networks. */ + sta_info_flush_delayed(sdata); } + memcpy(ifsta->bssid, bss->bssid, ETH_ALEN); res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); if (res) -- cgit v1.2.3 From c1b4aa3fb619782213af2af6652663c8f9cef373 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Thu, 29 Jan 2009 13:26:44 -0800 Subject: wireless: replace uses of __constant_{endian} The base versions handle constant folding now. Signed-off-by: Harvey Harrison Signed-off-by: John W. Linville --- net/mac80211/rx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 19ffc8ef1d1d..1a59382976e6 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1225,12 +1225,12 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) switch (hdr->frame_control & cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case __constant_cpu_to_le16(IEEE80211_FCTL_TODS): + case cpu_to_le16(IEEE80211_FCTL_TODS): if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP && sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return -1; break; - case __constant_cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + case cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS && sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) return -1; @@ -1244,13 +1244,13 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) } } break; - case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS): + case cpu_to_le16(IEEE80211_FCTL_FROMDS): if (sdata->vif.type != NL80211_IFTYPE_STATION || (is_multicast_ether_addr(dst) && !compare_ether_addr(src, dev->dev_addr))) return -1; break; - case __constant_cpu_to_le16(0): + case cpu_to_le16(0): if (sdata->vif.type != NL80211_IFTYPE_ADHOC) return -1; break; -- cgit v1.2.3 From 7fee5372d814c4be9546e5c28ac0058258d8df3e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 30 Jan 2009 11:13:06 +0100 Subject: mac80211: remove HW_SIGNAL_DB Giving the signal in dB isn't much more useful to userspace than giving the signal in unspecified units. This removes some radiotap information for zd1211 (the only driver using this flag), but it helps a lot for getting cfg80211-based scanning which won't support dB, and zd1211 being dB is a little fishy anyway. Signed-off-by: Johannes Berg Cc: Bruno Randolf Signed-off-by: John W. Linville --- net/mac80211/main.c | 1 - net/mac80211/rx.c | 11 +---------- net/mac80211/wext.c | 3 +-- 3 files changed, 2 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a109c06e8e4e..7247b303e966 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -884,7 +884,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.conf.listen_interval = local->hw.max_listen_interval; local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | - IEEE80211_HW_SIGNAL_DB | IEEE80211_HW_SIGNAL_DBM) ? IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; local->wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1a59382976e6..8e8ddbfcd236 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -86,8 +86,7 @@ ieee80211_rx_radiotap_len(struct ieee80211_local *local, if (status->flag & RX_FLAG_TSFT) len += 8; - if (local->hw.flags & IEEE80211_HW_SIGNAL_DB || - local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) len += 1; if (local->hw.flags & IEEE80211_HW_NOISE_DBM) len += 1; @@ -199,14 +198,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, *pos = status->antenna; pos++; - /* IEEE80211_RADIOTAP_DB_ANTSIGNAL */ - if (local->hw.flags & IEEE80211_HW_SIGNAL_DB) { - *pos = status->signal; - rthdr->it_present |= - cpu_to_le32(1 << IEEE80211_RADIOTAP_DB_ANTSIGNAL); - pos++; - } - /* IEEE80211_RADIOTAP_DB_ANTNOISE is not used */ /* IEEE80211_RADIOTAP_RX_FLAGS */ diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 5c88b8246bbb..bad1cfbfdf18 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -173,8 +173,7 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->num_encoding_sizes = 2; range->max_encoding_tokens = NUM_DEFAULT_KEYS; - if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC || - local->hw.flags & IEEE80211_HW_SIGNAL_DB) + if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) range->max_qual.level = local->hw.max_signal; else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) range->max_qual.level = -110; -- cgit v1.2.3 From 587e729ecff959482d25c73278a1fbadbc6a54fe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 30 Jan 2009 13:35:22 +0100 Subject: mac80211: convert to net_device_ops Convert to new net_device_ops in 2.6.28 and later. Signed-off-by: Stephen Hemminger Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/iface.c | 47 ++++++++++++++++++++++++++++++----------------- 1 file changed, 30 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 00562a8b99cf..915d04323a32 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -591,19 +591,6 @@ static void ieee80211_set_multicast_list(struct net_device *dev) dev_mc_sync(local->mdev, dev); } -static void ieee80211_if_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_subif_start_xmit; - dev->wireless_handlers = &ieee80211_iw_handler_def; - dev->set_multicast_list = ieee80211_set_multicast_list; - dev->change_mtu = ieee80211_change_mtu; - dev->open = ieee80211_open; - dev->stop = ieee80211_stop; - dev->destructor = free_netdev; - /* we will validate the address ourselves in ->open */ - dev->validate_addr = NULL; -} /* * Called when the netdev is removed or, by the code below, before * the interface type changes. @@ -671,6 +658,34 @@ static void ieee80211_teardown_sdata(struct net_device *dev) WARN_ON(flushed); } +static const struct net_device_ops ieee80211_dataif_ops = { + .ndo_open = ieee80211_open, + .ndo_stop = ieee80211_stop, + .ndo_uninit = ieee80211_teardown_sdata, + .ndo_start_xmit = ieee80211_subif_start_xmit, + .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_change_mtu = ieee80211_change_mtu, + .ndo_set_mac_address = eth_mac_addr, +}; + +static const struct net_device_ops ieee80211_monitorif_ops = { + .ndo_open = ieee80211_open, + .ndo_stop = ieee80211_stop, + .ndo_uninit = ieee80211_teardown_sdata, + .ndo_start_xmit = ieee80211_monitor_start_xmit, + .ndo_set_multicast_list = ieee80211_set_multicast_list, + .ndo_change_mtu = ieee80211_change_mtu, + .ndo_set_mac_address = eth_mac_addr, +}; + +static void ieee80211_if_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->netdev_ops = &ieee80211_dataif_ops; + dev->wireless_handlers = &ieee80211_iw_handler_def; + dev->destructor = free_netdev; +} + /* * Helper function to initialise an interface to a specific type. */ @@ -682,7 +697,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* and set some type-dependent values */ sdata->vif.type = type; - sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit; + sdata->dev->netdev_ops = &ieee80211_dataif_ops; sdata->wdev.iftype = type; /* only monitor differs */ @@ -703,7 +718,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_MONITOR: sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; - sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit; + sdata->dev->netdev_ops = &ieee80211_monitorif_ops; sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | MONITOR_FLAG_OTHER_BSS; break; @@ -809,8 +824,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if (ret) goto fail; - ndev->uninit = ieee80211_teardown_sdata; - if (ieee80211_vif_is_mesh(&sdata->vif) && params && params->mesh_id_len) ieee80211_sdata_set_mesh_id(sdata, -- cgit v1.2.3 From 7230645e329b4a9c566fefa9327eb8734c7d392c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 30 Jan 2009 13:36:25 +0100 Subject: mac80211: convert master interface to netdev_ops Also call our own ieee80211_master_setup routine instead of overwriting almost all the values from ether_setup; this loses a few assignments that are pointless on the master interface anyway. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 7247b303e966..caf92424c76d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -791,6 +791,23 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, } EXPORT_SYMBOL(ieee80211_alloc_hw); +static const struct net_device_ops ieee80211_master_ops = { + .ndo_start_xmit = ieee80211_master_start_xmit, + .ndo_open = ieee80211_master_open, + .ndo_stop = ieee80211_master_stop, + .ndo_set_multicast_list = ieee80211_master_set_multicast_list, + .ndo_select_queue = ieee80211_select_queue, +}; + +static void ieee80211_master_setup(struct net_device *mdev) +{ + mdev->type = ARPHRD_IEEE80211; + mdev->netdev_ops = &ieee80211_master_ops; + mdev->header_ops = &ieee80211_header_ops; + mdev->tx_queue_len = 1000; + mdev->addr_len = ETH_ALEN; +} + int ieee80211_register_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -840,7 +857,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) hw->ampdu_queues = 0; mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), - "wmaster%d", ether_setup, + "wmaster%d", ieee80211_master_setup, ieee80211_num_queues(hw)); if (!mdev) goto fail_mdev_alloc; @@ -851,13 +868,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_rx_bss_list_init(local); - mdev->hard_start_xmit = ieee80211_master_start_xmit; - mdev->open = ieee80211_master_open; - mdev->stop = ieee80211_master_stop; - mdev->type = ARPHRD_IEEE80211; - mdev->header_ops = &ieee80211_header_ops; - mdev->set_multicast_list = ieee80211_master_set_multicast_list; - local->hw.workqueue = create_singlethread_workqueue(wiphy_name(local->hw.wiphy)); if (!local->hw.workqueue) { @@ -923,8 +933,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) goto fail_wep; } - local->mdev->select_queue = ieee80211_select_queue; - /* add one default STA interface if supported */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_STATION)) { result = ieee80211_if_add(local, "wlan%d", NULL, -- cgit v1.2.3 From 47f4d8872ffc57ad92d0fb344e677d12acc34acd Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 30 Jan 2009 09:08:29 -0800 Subject: mac80211: do not TX injected frames when not allowed Monitor mode is able to TX by using injected frames. We should not allow injected frames to be sent unless allowed by regulatory rules. Since AP mode uses a monitor interfaces to transmit management frames we have to take care to not break AP mode as well while resolving this. We can deal with this by allowing compliant APs solutions to inform mac80211 if their monitor interface is intended to be used for an AP by setting a cfg80211 flag for the monitor interface. hostapd, for example, currently does its own checks to ensure AP mode is not used on channels which require radar detection. Once such solutions are available it can can add this flag for monitor interfaces. Acked-by: Johannes Berg Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/tx.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 7b013fb0d27f..f1c726d94f47 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1433,10 +1433,31 @@ int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_channel *chan = local->hw.conf.channel; struct ieee80211_radiotap_header *prthdr = (struct ieee80211_radiotap_header *)skb->data; u16 len_rthdr; + /* + * Frame injection is not allowed if beaconing is not allowed + * or if we need radar detection. Beaconing is usually not allowed when + * the mode or operation (Adhoc, AP, Mesh) does not support DFS. + * Passive scan is also used in world regulatory domains where + * your country is not known and as such it should be treated as + * NO TX unless the channel is explicitly allowed in which case + * your current regulatory domain would not have the passive scan + * flag. + * + * Since AP mode uses monitor interfaces to inject/TX management + * frames we can make AP mode the exception to this rule once it + * supports radar detection as its implementation can deal with + * radar detection by itself. We can do that later by adding a + * monitor flag interfaces used for AP support. + */ + if ((chan->flags & (IEEE80211_CHAN_NO_IBSS | IEEE80211_CHAN_RADAR | + IEEE80211_CHAN_PASSIVE_SCAN))) + goto fail; + /* check for not even having the fixed radiotap header part */ if (unlikely(skb->len < sizeof(struct ieee80211_radiotap_header))) goto fail; /* too short to be possibly valid */ -- cgit v1.2.3 From f130347c2dd8e7ce0757cd3cf80bedbc6ed63c4c Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 30 Jan 2009 09:26:42 -0800 Subject: cfg80211: add get reg command This lets userspace request to get the currently set regulatory domain. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/wireless/reg.c | 2 +- net/wireless/reg.h | 2 ++ 3 files changed, 84 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e69da8d20474..d452396006ee 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2093,6 +2093,81 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) #undef FILL_IN_MESH_PARAM_IF_SET +static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + void *hdr = NULL; + struct nlattr *nl_reg_rules; + unsigned int i; + int err = -EINVAL; + + mutex_lock(&cfg80211_drv_mutex); + + if (!cfg80211_regdomain) + goto out; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) { + err = -ENOBUFS; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + NL80211_CMD_GET_REG); + if (!hdr) + goto nla_put_failure; + + NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, + cfg80211_regdomain->alpha2); + + nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES); + if (!nl_reg_rules) + goto nla_put_failure; + + for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { + struct nlattr *nl_reg_rule; + const struct ieee80211_reg_rule *reg_rule; + const struct ieee80211_freq_range *freq_range; + const struct ieee80211_power_rule *power_rule; + + reg_rule = &cfg80211_regdomain->reg_rules[i]; + freq_range = ®_rule->freq_range; + power_rule = ®_rule->power_rule; + + nl_reg_rule = nla_nest_start(msg, i); + if (!nl_reg_rule) + goto nla_put_failure; + + NLA_PUT_U32(msg, NL80211_ATTR_REG_RULE_FLAGS, + reg_rule->flags); + NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_START, + freq_range->start_freq_khz); + NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_END, + freq_range->end_freq_khz); + NLA_PUT_U32(msg, NL80211_ATTR_FREQ_RANGE_MAX_BW, + freq_range->max_bandwidth_khz); + NLA_PUT_U32(msg, NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN, + power_rule->max_antenna_gain); + NLA_PUT_U32(msg, NL80211_ATTR_POWER_RULE_MAX_EIRP, + power_rule->max_eirp); + + nla_nest_end(msg, nl_reg_rule); + } + + nla_nest_end(msg, nl_reg_rules); + + genlmsg_end(msg, hdr); + err = genlmsg_unicast(msg, info->snd_pid); + goto out; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + err = -EMSGSIZE; +out: + mutex_unlock(&cfg80211_drv_mutex); + return err; +} + static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) { struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; @@ -2332,6 +2407,12 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_GET_REG, + .doit = nl80211_get_reg, + .policy = nl80211_policy, + /* can be retrieved by unprivileged users */ + }, { .cmd = NL80211_CMD_SET_REG, .doit = nl80211_set_reg, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index f643d3981102..2323644330cd 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -57,7 +57,7 @@ static u32 supported_bandwidths[] = { /* Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no * information to give us an alpha2 */ -static const struct ieee80211_regdomain *cfg80211_regdomain; +const struct ieee80211_regdomain *cfg80211_regdomain; /* We use this as a place for the rd structure built from the * last parsed country IE to rest until CRDA gets back to us with diff --git a/net/wireless/reg.h b/net/wireless/reg.h index eb1dd5bc9b27..fe8c83f34fb7 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -1,6 +1,8 @@ #ifndef __NET_WIRELESS_REG_H #define __NET_WIRELESS_REG_H +extern const struct ieee80211_regdomain *cfg80211_regdomain; + bool is_world_regdom(const char *alpha2); bool reg_is_valid_request(const char *alpha2); -- cgit v1.2.3 From d43e87868f67c5b52defd8d82bc3f54347ed2408 Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Tue, 3 Feb 2009 10:09:49 +0530 Subject: mac80211: Remove bss information of the current AP when it goes out of range There is no point having the bss information of currently associated AP when the AP is detected to be out of range. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 73808780f538..57967d32e5fd 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1042,6 +1042,7 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; int disassoc; + bool remove_bss = false; /* TODO: start monitoring current AP signal quality and number of * missed beacons. Scan other channels every now and then and search @@ -1067,6 +1068,7 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, "range\n", sdata->dev->name, ifsta->bssid); disassoc = 1; + remove_bss = true; } else ieee80211_send_probe_req(sdata, ifsta->bssid, ifsta->ssid, @@ -1086,12 +1088,24 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - if (disassoc) + if (disassoc) { ieee80211_set_disassoc(sdata, ifsta, true, true, WLAN_REASON_PREV_AUTH_NOT_VALID); - else + if (remove_bss) { + struct ieee80211_bss *bss; + + bss = ieee80211_rx_bss_get(local, ifsta->bssid, + local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); + if (bss) { + atomic_dec(&bss->users); + ieee80211_rx_bss_put(local, bss); + } + } + } else { mod_timer(&ifsta->timer, jiffies + IEEE80211_MONITORING_INTERVAL); + } } -- cgit v1.2.3 From 149490f131ab532a3b9e8806249a0e730994cdf6 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 10 Feb 2009 00:11:21 -0800 Subject: pkt_sched: sch_multiq: Change errno on non-multiqueue devices use. Current "RTNETLINK answers: Invalid argument" warning, while trying to add multiq qdisc to non-multiqueue device, isn't very helpful and some of these devs can be changed btw., so let's use a better errno. With feedback from Stephen Hemminger Reported-by: Badalian Vyacheslav Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_multiq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 7e151861794b..912731203047 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -202,7 +202,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) int i; if (!netif_is_multiqueue(qdisc_dev(sch))) - return -EINVAL; + return -EOPNOTSUPP; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; -- cgit v1.2.3 From b4ac530fc3af02a004729043dacf6b6330b46892 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Feb 2009 02:09:24 -0800 Subject: net: Move skbuff symbol exports after each symbol's definition. net/core/skbuff.c is a hodge-podge of symbol export placement. Some of the exports are right after the definition of the symbol being exported, others are clumped together into a big group at the end of the file. Make things consistent. Signed-off-by: David S. Miller --- net/core/skbuff.c | 79 +++++++++++++++++++++++++------------------------------ 1 file changed, 36 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 67f2a2f85827..7657cec5973d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -123,6 +123,7 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here) skb->dev ? skb->dev->name : ""); BUG(); } +EXPORT_SYMBOL(skb_over_panic); /** * skb_under_panic - private function @@ -142,6 +143,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) skb->dev ? skb->dev->name : ""); BUG(); } +EXPORT_SYMBOL(skb_under_panic); void skb_truesize_bug(struct sk_buff *skb) { @@ -231,6 +233,7 @@ nodata: skb = NULL; goto out; } +EXPORT_SYMBOL(__alloc_skb); /** * __netdev_alloc_skb - allocate an skbuff for rx on a specific device @@ -258,6 +261,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, } return skb; } +EXPORT_SYMBOL(__netdev_alloc_skb); struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) { @@ -426,6 +430,7 @@ void __kfree_skb(struct sk_buff *skb) skb_release_all(skb); kfree_skbmem(skb); } +EXPORT_SYMBOL(__kfree_skb); /** * kfree_skb - free an sk_buff @@ -444,6 +449,7 @@ void kfree_skb(struct sk_buff *skb) return; __kfree_skb(skb); } +EXPORT_SYMBOL(kfree_skb); /** * skb_recycle_check - check if skb can be reused for receive @@ -613,6 +619,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) return __skb_clone(n, skb); } +EXPORT_SYMBOL(skb_clone); static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { @@ -679,7 +686,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) copy_skb_header(n, skb); return n; } - +EXPORT_SYMBOL(skb_copy); /** * pskb_copy - create copy of an sk_buff with private head. @@ -738,6 +745,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) out: return n; } +EXPORT_SYMBOL(pskb_copy); /** * pskb_expand_head - reallocate header of &sk_buff @@ -821,6 +829,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, nodata: return -ENOMEM; } +EXPORT_SYMBOL(pskb_expand_head); /* Make private copy of skb with writable head and some headroom */ @@ -841,7 +850,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) } return skb2; } - +EXPORT_SYMBOL(skb_realloc_headroom); /** * skb_copy_expand - copy and expand sk_buff @@ -906,6 +915,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, return n; } +EXPORT_SYMBOL(skb_copy_expand); /** * skb_pad - zero pad the tail of an skb @@ -951,6 +961,7 @@ free_skb: kfree_skb(skb); return err; } +EXPORT_SYMBOL(skb_pad); /** * skb_put - add data to a buffer @@ -1108,6 +1119,7 @@ done: return 0; } +EXPORT_SYMBOL(___pskb_trim); /** * __pskb_pull_tail - advance tail of skb header @@ -1246,6 +1258,7 @@ pull_pages: return skb_tail_pointer(skb); } +EXPORT_SYMBOL(__pskb_pull_tail); /* Copy some data bits from skb to kernel buffer. */ @@ -1323,6 +1336,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) fault: return -EFAULT; } +EXPORT_SYMBOL(skb_copy_bits); /* * Callback from splice_to_pipe(), if we need to release some pages @@ -1623,7 +1637,6 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) fault: return -EFAULT; } - EXPORT_SYMBOL(skb_store_bits); /* Checksum skb data. */ @@ -1700,6 +1713,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, return csum; } +EXPORT_SYMBOL(skb_checksum); /* Both of above in one bottle. */ @@ -1781,6 +1795,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, BUG_ON(len); return csum; } +EXPORT_SYMBOL(skb_copy_and_csum_bits); void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) { @@ -1807,6 +1822,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) *((__sum16 *)(to + csstuff)) = csum_fold(csum); } } +EXPORT_SYMBOL(skb_copy_and_csum_dev); /** * skb_dequeue - remove from the head of the queue @@ -1827,6 +1843,7 @@ struct sk_buff *skb_dequeue(struct sk_buff_head *list) spin_unlock_irqrestore(&list->lock, flags); return result; } +EXPORT_SYMBOL(skb_dequeue); /** * skb_dequeue_tail - remove from the tail of the queue @@ -1846,6 +1863,7 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) spin_unlock_irqrestore(&list->lock, flags); return result; } +EXPORT_SYMBOL(skb_dequeue_tail); /** * skb_queue_purge - empty a list @@ -1861,6 +1879,7 @@ void skb_queue_purge(struct sk_buff_head *list) while ((skb = skb_dequeue(list)) != NULL) kfree_skb(skb); } +EXPORT_SYMBOL(skb_queue_purge); /** * skb_queue_head - queue a buffer at the list head @@ -1881,6 +1900,7 @@ void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) __skb_queue_head(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } +EXPORT_SYMBOL(skb_queue_head); /** * skb_queue_tail - queue a buffer at the list tail @@ -1901,6 +1921,7 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) __skb_queue_tail(list, newsk); spin_unlock_irqrestore(&list->lock, flags); } +EXPORT_SYMBOL(skb_queue_tail); /** * skb_unlink - remove a buffer from a list @@ -1920,6 +1941,7 @@ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) __skb_unlink(skb, list); spin_unlock_irqrestore(&list->lock, flags); } +EXPORT_SYMBOL(skb_unlink); /** * skb_append - append a buffer @@ -1939,7 +1961,7 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head __skb_queue_after(list, old, newsk); spin_unlock_irqrestore(&list->lock, flags); } - +EXPORT_SYMBOL(skb_append); /** * skb_insert - insert a buffer @@ -1961,6 +1983,7 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head __skb_insert(newsk, old->prev, old, list); spin_unlock_irqrestore(&list->lock, flags); } +EXPORT_SYMBOL(skb_insert); static inline void skb_split_inside_header(struct sk_buff *skb, struct sk_buff* skb1, @@ -2039,6 +2062,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) else /* Second chunk has no header, nothing to copy. */ skb_split_no_header(skb, skb1, len, pos); } +EXPORT_SYMBOL(skb_split); /* Shifting from/to a cloned skb is a no-go. * @@ -2201,6 +2225,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from, st->frag_idx = st->stepped_offset = 0; st->frag_data = NULL; } +EXPORT_SYMBOL(skb_prepare_seq_read); /** * skb_seq_read - Sequentially read skb data @@ -2288,6 +2313,7 @@ next_skb: return 0; } +EXPORT_SYMBOL(skb_seq_read); /** * skb_abort_seq_read - Abort a sequential read of skb data @@ -2301,6 +2327,7 @@ void skb_abort_seq_read(struct skb_seq_state *st) if (st->frag_data) kunmap_skb_frag(st->frag_data); } +EXPORT_SYMBOL(skb_abort_seq_read); #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) @@ -2343,6 +2370,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, ret = textsearch_find(config, state); return (ret <= to - from ? ret : UINT_MAX); } +EXPORT_SYMBOL(skb_find_text); /** * skb_append_datato_frags: - append the user data to a skb @@ -2415,6 +2443,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, return 0; } +EXPORT_SYMBOL(skb_append_datato_frags); /** * skb_pull_rcsum - pull skb and update receive checksum @@ -2602,7 +2631,6 @@ err: } return ERR_PTR(err); } - EXPORT_SYMBOL_GPL(skb_segment); int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) @@ -2800,6 +2828,7 @@ int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int le return nsg; } +EXPORT_SYMBOL_GPL(skb_to_sgvec); /** * skb_cow_data - Check that a socket buffer's data buffers are writable @@ -2909,6 +2938,7 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) return elt; } +EXPORT_SYMBOL_GPL(skb_cow_data); /** * skb_partial_csum_set - set up and verify partial csum values for packet @@ -2937,6 +2967,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off) skb->csum_offset = off; return true; } +EXPORT_SYMBOL_GPL(skb_partial_csum_set); void __skb_warn_lro_forwarding(const struct sk_buff *skb) { @@ -2944,42 +2975,4 @@ void __skb_warn_lro_forwarding(const struct sk_buff *skb) pr_warning("%s: received packets cannot be forwarded" " while LRO is enabled\n", skb->dev->name); } - -EXPORT_SYMBOL(___pskb_trim); -EXPORT_SYMBOL(__kfree_skb); -EXPORT_SYMBOL(kfree_skb); -EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(__alloc_skb); -EXPORT_SYMBOL(__netdev_alloc_skb); -EXPORT_SYMBOL(pskb_copy); -EXPORT_SYMBOL(pskb_expand_head); -EXPORT_SYMBOL(skb_checksum); -EXPORT_SYMBOL(skb_clone); -EXPORT_SYMBOL(skb_copy); -EXPORT_SYMBOL(skb_copy_and_csum_bits); -EXPORT_SYMBOL(skb_copy_and_csum_dev); -EXPORT_SYMBOL(skb_copy_bits); -EXPORT_SYMBOL(skb_copy_expand); -EXPORT_SYMBOL(skb_over_panic); -EXPORT_SYMBOL(skb_pad); -EXPORT_SYMBOL(skb_realloc_headroom); -EXPORT_SYMBOL(skb_under_panic); -EXPORT_SYMBOL(skb_dequeue); -EXPORT_SYMBOL(skb_dequeue_tail); -EXPORT_SYMBOL(skb_insert); -EXPORT_SYMBOL(skb_queue_purge); -EXPORT_SYMBOL(skb_queue_head); -EXPORT_SYMBOL(skb_queue_tail); -EXPORT_SYMBOL(skb_unlink); -EXPORT_SYMBOL(skb_append); -EXPORT_SYMBOL(skb_split); -EXPORT_SYMBOL(skb_prepare_seq_read); -EXPORT_SYMBOL(skb_seq_read); -EXPORT_SYMBOL(skb_abort_seq_read); -EXPORT_SYMBOL(skb_find_text); -EXPORT_SYMBOL(skb_append_datato_frags); EXPORT_SYMBOL(__skb_warn_lro_forwarding); - -EXPORT_SYMBOL_GPL(skb_to_sgvec); -EXPORT_SYMBOL_GPL(skb_cow_data); -EXPORT_SYMBOL_GPL(skb_partial_csum_set); -- cgit v1.2.3 From 7a9470806053f765ecf7f3932acb4c95c204ad4b Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Wed, 4 Feb 2009 18:28:48 +0530 Subject: mac80211: Free current bss information in few places where we don't need it any more Signed-off-by: Vasanthakumar Thiagarajan Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 38 +++++++++++++++++++++----------------- net/mac80211/scan.c | 13 +++++++++++++ 3 files changed, 36 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index eaf3603862b7..5a1f19ad43c8 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -955,6 +955,8 @@ ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, u8 *ssid, u8 ssid_len); void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss); +void ieee80211_rx_bss_remove(struct ieee80211_sub_if_data *sdata, u8 *bssid, + int freq, u8 *ssid, u8 ssid_len); /* interface handling */ int ieee80211_if_add(struct ieee80211_local *local, const char *name, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 57967d32e5fd..91c9a5a5746d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -840,6 +840,14 @@ static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, sdata->dev->name, ifsta->bssid); ifsta->state = IEEE80211_STA_MLME_DISABLED; ieee80211_sta_send_apinfo(sdata, ifsta); + + /* + * Most likely AP is not in the range so remove the + * bss information associated to the AP + */ + ieee80211_rx_bss_remove(sdata, ifsta->bssid, + sdata->local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); return; } @@ -871,6 +879,9 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, sdata->dev->name, ifsta->bssid); ifsta->state = IEEE80211_STA_MLME_DISABLED; ieee80211_sta_send_apinfo(sdata, ifsta); + ieee80211_rx_bss_remove(sdata, ifsta->bssid, + sdata->local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); return; } @@ -933,8 +944,12 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_sta_send_apinfo(sdata, ifsta); - if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) + if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) { ifsta->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_rx_bss_remove(sdata, ifsta->bssid, + sdata->local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); + } rcu_read_unlock(); @@ -1017,6 +1032,9 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata, sdata->dev->name, ifsta->bssid); ifsta->state = IEEE80211_STA_MLME_DISABLED; ieee80211_sta_send_apinfo(sdata, ifsta); + ieee80211_rx_bss_remove(sdata, ifsta->bssid, + sdata->local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); return; } @@ -1042,7 +1060,6 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sta_info *sta; int disassoc; - bool remove_bss = false; /* TODO: start monitoring current AP signal quality and number of * missed beacons. Scan other channels every now and then and search @@ -1068,7 +1085,6 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, "range\n", sdata->dev->name, ifsta->bssid); disassoc = 1; - remove_bss = true; } else ieee80211_send_probe_req(sdata, ifsta->bssid, ifsta->ssid, @@ -1088,24 +1104,12 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); - if (disassoc) { + if (disassoc) ieee80211_set_disassoc(sdata, ifsta, true, true, WLAN_REASON_PREV_AUTH_NOT_VALID); - if (remove_bss) { - struct ieee80211_bss *bss; - - bss = ieee80211_rx_bss_get(local, ifsta->bssid, - local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); - if (bss) { - atomic_dec(&bss->users); - ieee80211_rx_bss_put(local, bss); - } - } - } else { + else mod_timer(&ifsta->timer, jiffies + IEEE80211_MONITORING_INTERVAL); - } } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 282e6a0dec01..50719ea08172 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -327,6 +327,19 @@ ieee80211_bss_info_update(struct ieee80211_local *local, return bss; } +void ieee80211_rx_bss_remove(struct ieee80211_sub_if_data *sdata, u8 *bssid, + int freq, u8 *ssid, u8 ssid_len) +{ + struct ieee80211_bss *bss; + struct ieee80211_local *local = sdata->local; + + bss = ieee80211_rx_bss_get(local, bssid, freq, ssid, ssid_len); + if (bss) { + atomic_dec(&bss->users); + ieee80211_rx_bss_put(local, bss); + } +} + ieee80211_rx_result ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_rx_status *rx_status) -- cgit v1.2.3 From ce3dd39595d9d64f4ba6ee8dd24c6269a3b56b6a Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Thu, 12 Feb 2009 16:51:43 -0800 Subject: net: Fix page seeking for skb_splice_bits(). struct page walking should be done with proper accessor functions, not directly. With doubts from David S. Miller and Herbert Xu. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/core/skbuff.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 7657cec5973d..ab7d2e9f02fa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1412,8 +1412,13 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page, static inline void __segment_seek(struct page **page, unsigned int *poff, unsigned int *plen, unsigned int off) { + unsigned long n; + *poff += off; - *page += *poff / PAGE_SIZE; + n = *poff / PAGE_SIZE; + if (n) + *page = nth_page(*page, n); + *poff = *poff % PAGE_SIZE; *plen -= off; } -- cgit v1.2.3 From 97d97b80984d0207e5c125c1b7b9467aad365d8d Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Thu, 5 Feb 2009 20:05:15 +0530 Subject: mac80211: Fix the wrong WARN_ON message appearing on enabling power save. This issue happens only when we are associated with a 11n AP and power save is enabled. In the function 'ieee80211_master_start_xmit', ps_disable_work is queued where wake_queues is called. But before this work is executed, we check if the queues are stopped in _ieee80211_tx and return TX_AGAIN to ieee8011_tx which leads to the warning message. This patch fixes this erroneous case. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/tx.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f1c726d94f47..bf73f6d561b7 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -35,6 +35,7 @@ #define IEEE80211_TX_OK 0 #define IEEE80211_TX_AGAIN 1 #define IEEE80211_TX_FRAG_AGAIN 2 +#define IEEE80211_TX_PENDING 3 /* misc utils */ @@ -1085,7 +1086,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, if (skb) { if (netif_subqueue_stopped(local->mdev, skb)) - return IEEE80211_TX_AGAIN; + return IEEE80211_TX_PENDING; ret = local->ops->tx(local_to_hw(local), skb); if (ret) @@ -1211,8 +1212,9 @@ retry: * queues, there's no reason for a driver to reject * a frame there, warn and drop it. */ - if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) - goto drop; + if (ret != IEEE80211_TX_PENDING) + if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) + goto drop; store = &local->pending_packet[queue]; -- cgit v1.2.3 From 1fb3606bc5864c64c78ce4e1751e5382a9a5aa84 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 10 Feb 2009 17:09:24 +0200 Subject: mac80211: remove multicast check from check_tim() Currently mac80211 checks for the multicast tim bit from beacons, disables power save and sends a null frame if the bit is set. This was added to support ath9k. But this is a bit controversial because the AP will send multicast frames immediately after the beacon and the time constraints are really high. Relying mac80211 to be fast enough here might not be reliable in all situations. And there's no need to send a null frame, AP will send the frames immediately after the dtim beacon no matter what. Also if dynamic power save is disabled (iwconfig wlan0 power timeout 0) currently mac80211 disables power save whenever the multicast bit is set but it's never enabled again after receiving the first multicast/broadcast frame. The current implementation is not usable on p54/stlc45xx and the easiest way to fix this is to remove the multicast tim bit check altogether. Handling multicast tim bit in host is rare, most of the designs do this in firmware/hardware, so it's better not to have it in mac80211. It's a lot better to do this in firmware/hardware, or if that's not possible it could be done in the driver. Also renamed the function to ieee80211_check_tim() to follow the style of the file. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 91c9a5a5746d..05c8d13d39b6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -611,7 +611,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, } } -static bool check_tim(struct ieee802_11_elems *elems, u16 aid, bool *is_mc) +static bool ieee80211_check_tim(struct ieee802_11_elems *elems, u16 aid) { u8 mask; u8 index, indexn1, indexn2; @@ -621,9 +621,6 @@ static bool check_tim(struct ieee802_11_elems *elems, u16 aid, bool *is_mc) index = aid / 8; mask = 1 << (aid & 7); - if (tim->bitmap_ctrl & 0x01) - *is_mc = true; - indexn1 = tim->bitmap_ctrl & 0xfe; indexn2 = elems->tim_len + indexn1 - 4; @@ -1840,7 +1837,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee802_11_elems elems; struct ieee80211_local *local = sdata->local; u32 changed = 0; - bool erp_valid, directed_tim, is_mc = false; + bool erp_valid, directed_tim; u8 erp_value = 0; /* Process beacon from the current BSS */ @@ -1868,9 +1865,9 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && local->hw.conf.flags & IEEE80211_CONF_PS) { - directed_tim = check_tim(&elems, ifsta->aid, &is_mc); + directed_tim = ieee80211_check_tim(&elems, ifsta->aid); - if (directed_tim || is_mc) { + if (directed_tim) { local->hw.conf.flags &= ~IEEE80211_CONF_PS; ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); ieee80211_send_nullfunc(local, sdata, 0); -- cgit v1.2.3 From 572e00122190e3064fa19bd9780b146d2d0f1905 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Tue, 10 Feb 2009 17:09:31 +0200 Subject: mac80211: use ps-poll when dynamic power save mode is disabled When a directed tim bit is set, mac80211 currently disables power save ands sends a null frame to the AP. But if dynamic power save is disabled, mac80211 will not enable power save ever gain. Fix this by adding ps-poll functionality to mac80211. When a directed tim bit is set, mac80211 sends a ps-poll frame to the AP and checks for the more data bit in the returned data frames. Using ps-poll is slower than waking up with null frame, but it's saves more power in cases where the traffic is low. Userspace can control if either ps-poll or null wakeup method is used by enabling and disabling dynamic power save. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/mlme.c | 54 +++++++++++++++++++++++++++++++++++++++++++--- net/mac80211/rx.c | 34 +++++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5a1f19ad43c8..67bd5220cf40 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -728,6 +728,7 @@ struct ieee80211_local { unsigned int wmm_acm; /* bit field of ACM bits (BIT(802.1D tag)) */ bool powersave; + bool pspolling; struct work_struct dynamic_ps_enable_work; struct work_struct dynamic_ps_disable_work; struct timer_list dynamic_ps_timer; @@ -921,6 +922,8 @@ u32 ieee80211_sta_get_rates(struct ieee80211_local *local, enum ieee80211_band band); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, u8 *ssid, size_t ssid_len); +void ieee80211_send_pspoll(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); /* scan/BSS handling */ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 05c8d13d39b6..169f10c51042 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -511,6 +511,39 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb, ifsta->flags & IEEE80211_STA_MFP_ENABLED); } +void ieee80211_send_pspoll(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_pspoll *pspoll; + struct sk_buff *skb; + u16 fc; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll)); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for " + "pspoll frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll)); + memset(pspoll, 0, sizeof(*pspoll)); + fc = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL | IEEE80211_FCTL_PM; + pspoll->frame_control = cpu_to_le16(fc); + pspoll->aid = cpu_to_le16(ifsta->aid); + + /* aid in PS-Poll has its two MSBs each set to 1 */ + pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14); + + memcpy(pspoll->bssid, ifsta->bssid, ETH_ALEN); + memcpy(pspoll->ta, sdata->dev->dev_addr, ETH_ALEN); + + ieee80211_tx_skb(sdata, skb, 0); + + return; +} + /* MLME */ static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss) @@ -1868,9 +1901,24 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, directed_tim = ieee80211_check_tim(&elems, ifsta->aid); if (directed_tim) { - local->hw.conf.flags &= ~IEEE80211_CONF_PS; - ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); - ieee80211_send_nullfunc(local, sdata, 0); + if (local->hw.conf.dynamic_ps_timeout > 0) { + local->hw.conf.flags &= ~IEEE80211_CONF_PS; + ieee80211_hw_config(local, + IEEE80211_CONF_CHANGE_PS); + ieee80211_send_nullfunc(local, sdata, 0); + } else { + local->pspolling = true; + + /* + * Here is assumed that the driver will be + * able to send ps-poll frame and receive a + * response even though power save mode is + * enabled, but some drivers might require + * to disable power save here. This needs + * to be investigated. + */ + ieee80211_send_pspoll(local, sdata); + } } } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8e8ddbfcd236..0e030d3fbdec 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -731,6 +731,39 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return result; } +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_check_more_data(struct ieee80211_rx_data *rx) +{ + struct ieee80211_local *local; + struct ieee80211_hdr *hdr; + struct sk_buff *skb; + + local = rx->local; + skb = rx->skb; + hdr = (struct ieee80211_hdr *) skb->data; + + if (!local->pspolling) + return RX_CONTINUE; + + if (!ieee80211_has_fromds(hdr->frame_control)) + /* this is not from AP */ + return RX_CONTINUE; + + if (!ieee80211_is_data(hdr->frame_control)) + return RX_CONTINUE; + + if (!ieee80211_has_moredata(hdr->frame_control)) { + /* AP has no more frames buffered for us */ + local->pspolling = false; + return RX_CONTINUE; + } + + /* more data bit is set, let's request a new frame from the AP */ + ieee80211_send_pspoll(local, rx->sdata); + + return RX_CONTINUE; +} + static void ap_sta_ps_start(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; @@ -1987,6 +2020,7 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, CALL_RXH(ieee80211_rx_h_passive_scan) CALL_RXH(ieee80211_rx_h_check) CALL_RXH(ieee80211_rx_h_decrypt) + CALL_RXH(ieee80211_rx_h_check_more_data) CALL_RXH(ieee80211_rx_h_sta_process) CALL_RXH(ieee80211_rx_h_defragment) CALL_RXH(ieee80211_rx_h_ps_poll) -- cgit v1.2.3 From 5e1333624827e7a91b2d2cc04ce978f050cae15e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:38 +0100 Subject: mac80211: disable IBSS beacon before join Before we have a probe response frame (which is used as the beacon too) there's no need to ask drivers to beacon, they will not get a beacon anyway. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index caf92424c76d..956afea4214d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -210,6 +210,8 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) !!rcu_dereference(sdata->u.ap.beacon); break; case NL80211_IFTYPE_ADHOC: + conf.enable_beacon = !!sdata->u.sta.probe_resp; + break; case NL80211_IFTYPE_MESH_POINT: conf.enable_beacon = true; break; -- cgit v1.2.3 From e4e5e2b0b83c816e581ca4671569306bcba77667 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:40 +0100 Subject: mac80211: properly validate/translate IW_AUTH_MFP values Make sure nobody passes in bogus values, and translate the values (although it isn't necessary). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/wext.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index bad1cfbfdf18..acd5808b87f4 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -982,9 +982,21 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, break; } if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) - sdata->u.sta.mfp = data->value; - else + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + switch (data->value) { + case IW_AUTH_MFP_DISABLED: + sdata->u.sta.mfp = IEEE80211_MFP_DISABLED; + break; + case IW_AUTH_MFP_OPTIONAL: + sdata->u.sta.mfp = IEEE80211_MFP_OPTIONAL; + break; + case IW_AUTH_MFP_REQUIRED: + sdata->u.sta.mfp = IEEE80211_MFP_REQUIRED; + break; + default: + ret = -EINVAL; + } + } else ret = -EOPNOTSUPP; break; default: -- cgit v1.2.3 From 60b22511921fe79b2a94a27c09cadfd32fcef5d5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:41 +0100 Subject: mac80211: reject extra IEs for probe request when hw_scan We cannot currently hand off extra IEs to hw_scan, so reject configuring extra IEs for probe request frames when hw_scan is set. Signed-off-by: Johannes Berg Cc: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a1a1344c5c4b..42d692fd9bec 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1176,11 +1176,16 @@ static int ieee80211_set_channel(struct wiphy *wiphy, return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } -static int set_mgmt_extra_ie_sta(struct ieee80211_if_sta *ifsta, u8 subtype, - u8 *ies, size_t ies_len) +static int set_mgmt_extra_ie_sta(struct ieee80211_sub_if_data *sdata, + u8 subtype, u8 *ies, size_t ies_len) { + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + switch (subtype) { case IEEE80211_STYPE_PROBE_REQ >> 4: + if (local->ops->hw_scan) + break; kfree(ifsta->ie_probereq); ifsta->ie_probereq = ies; ifsta->ie_probereq_len = ies_len; @@ -1244,7 +1249,7 @@ static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy, switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: - ret = set_mgmt_extra_ie_sta(&sdata->u.sta, params->subtype, + ret = set_mgmt_extra_ie_sta(sdata, params->subtype, ies, ies_len); break; default: -- cgit v1.2.3 From 14b80724367dfdc86f4807461dd1f7f2dd630416 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:42 +0100 Subject: mac80211: fix beacon enable more Hopefully the last required fix ... disable beaconing only on beaconing interfaces, and thus avoid calling ieee80211_if_config for purely virtual interfaces (those driver doesn't know about). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/scan.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 50719ea08172..eddca4e1e13c 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -500,7 +500,12 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw) } else netif_tx_wake_all_queues(sdata->dev); - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED); + /* re-enable beaconing */ + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_ADHOC || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT) + ieee80211_if_config(sdata, + IEEE80211_IFCC_BEACON_ENABLED); } mutex_unlock(&local->iflist_mtx); @@ -656,7 +661,12 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, if (!netif_running(sdata->dev)) continue; - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON_ENABLED); + /* disable beaconing */ + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_ADHOC || + sdata->vif.type == NL80211_IFTYPE_MESH_POINT) + ieee80211_if_config(sdata, + IEEE80211_IFCC_BEACON_ENABLED); if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { -- cgit v1.2.3 From 7ab17c45b566b8a4a87ceac6cd6c6d77857189ab Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:43 +0100 Subject: mac80211: remove bssid argument from prepare_for_handlers It's a little confusing to get the BSSID outside the function and pass it in, when it's only needed for this function, so change that. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 0e030d3fbdec..5a733c52f889 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2064,9 +2064,10 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, /* main receive path */ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, - u8 *bssid, struct ieee80211_rx_data *rx, + struct ieee80211_rx_data *rx, struct ieee80211_hdr *hdr) { + u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, sdata->vif.type); int multicast = is_multicast_ether_addr(hdr->addr1); switch (sdata->vif.type) { @@ -2169,7 +2170,6 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, int prepares; struct ieee80211_sub_if_data *prev = NULL; struct sk_buff *skb_new; - u8 *bssid; hdr = (struct ieee80211_hdr *)skb->data; memset(&rx, 0, sizeof(rx)); @@ -2208,9 +2208,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; - bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); rx.flags |= IEEE80211_RX_RA_MATCH; - prepares = prepare_for_handlers(sdata, bssid, &rx, hdr); + prepares = prepare_for_handlers(sdata, &rx, hdr); if (!prepares) continue; -- cgit v1.2.3 From 8b1c814d65ae3219ee19d39ad6097f3d5249c23d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:44 +0100 Subject: mac80211: remove stray aggregation debugfs definition Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/sta_info.h | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index d13a44b935e2..d75c870fae65 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -65,7 +65,6 @@ enum ieee80211_sta_info_flags { #define HT_AGG_STATE_OPERATIONAL (HT_ADDBA_REQUESTED_MSK | \ HT_ADDBA_DRV_READY_MSK | \ HT_ADDBA_RECEIVED_MSK) -#define HT_AGG_STATE_DEBUGFS_CTL BIT(7) /** * struct tid_ampdu_tx - TID aggregation information (Tx). -- cgit v1.2.3 From 20ad19d0ac7389b04b566ebf3e0e497974f63ffa Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:45 +0100 Subject: mac80211: fix RX aggregation timeouts The values are in TUs (1.024ms), not ms. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mesh_hwmp.c | 1 - net/mac80211/rx.c | 16 ++++++---------- net/mac80211/sta_info.h | 2 +- 4 files changed, 9 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 67bd5220cf40..5b230015f938 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -57,6 +57,8 @@ struct ieee80211_local; */ #define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ) +#define TU_TO_EXP_TIME(x) (jiffies + usecs_to_jiffies((x) * 1024)) + struct ieee80211_fragment_entry { unsigned long first_frag_time; unsigned int seq; diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 4f862b2a0041..60b35accda91 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -58,7 +58,6 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) #define PERR_IE_DST_ADDR(x) (x + 2) #define PERR_IE_DST_DSN(x) u32_field_get(x, 8, 0); -#define TU_TO_EXP_TIME(x) (jiffies + msecs_to_jiffies(x * 1024 / 1000)) #define MSEC_TO_TU(x) (x*1000/1024) #define DSN_GT(x, y) ((long) (y) - (long) (x) < 0) #define DSN_LT(x, y) ((long) (x) - (long) (y) < 0) diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 5a733c52f889..f34cc66d3f4b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1673,11 +1673,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) start_seq_num = le16_to_cpu(bar->start_seq_num) >> 4; /* reset session timer */ - if (tid_agg_rx->timeout) { - unsigned long expires = - jiffies + (tid_agg_rx->timeout / 1000) * HZ; - mod_timer(&tid_agg_rx->session_timer, expires); - } + if (tid_agg_rx->timeout) + mod_timer(&tid_agg_rx->session_timer, + TU_TO_EXP_TIME(tid_agg_rx->timeout)); /* manage reordering buffer according to requested */ /* sequence number */ @@ -2414,11 +2412,9 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, /* new un-ordered ampdu frame - process it */ /* reset session timer */ - if (tid_agg_rx->timeout) { - unsigned long expires = - jiffies + (tid_agg_rx->timeout / 1000) * HZ; - mod_timer(&tid_agg_rx->session_timer, expires); - } + if (tid_agg_rx->timeout) + mod_timer(&tid_agg_rx->session_timer, + TU_TO_EXP_TIME(tid_agg_rx->timeout)); /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index d75c870fae65..a070bd929e00 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -88,7 +88,7 @@ struct tid_ampdu_tx { * @stored_mpdu_num: number of MPDUs in reordering buffer * @ssn: Starting Sequence Number expected to be aggregated. * @buf_size: buffer size for incoming A-MPDUs - * @timeout: reset timer value. + * @timeout: reset timer value (in TUs). * @dialog_token: dialog token for aggregation session */ struct tid_ampdu_rx { -- cgit v1.2.3 From b8695a8fe6d89140f8d17668e99ebd39358d7c0b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:46 +0100 Subject: mac80211: restructure HT code Create two new files, agg-tx.c and agg-rx.c to make it clearer which code is common (ht.c) and which is specific (agg-*.c). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/Makefile | 2 +- net/mac80211/agg-rx.c | 287 +++++++++++++++ net/mac80211/agg-tx.c | 593 +++++++++++++++++++++++++++++++ net/mac80211/ht.c | 867 +-------------------------------------------- net/mac80211/ieee80211_i.h | 3 + 5 files changed, 895 insertions(+), 857 deletions(-) create mode 100644 net/mac80211/agg-rx.c create mode 100644 net/mac80211/agg-tx.c (limited to 'net') diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 58c94bb38e87..3503a3d21318 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -8,7 +8,7 @@ mac80211-y := \ wep.o \ wpa.o \ scan.o \ - ht.o \ + ht.o agg-tx.o agg-rx.o \ mlme.o \ iface.o \ rate.o \ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c new file mode 100644 index 000000000000..62b9feb3c804 --- /dev/null +++ b/net/mac80211/agg-rx.c @@ -0,0 +1,287 @@ +/* + * HT handling + * + * Copyright 2003, Jouni Malinen + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc + * Copyright 2007, Michael Wu + * Copyright 2007-2008, Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include "ieee80211_i.h" + +void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, + u16 initiator, u16 reason) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_hw *hw = &local->hw; + struct sta_info *sta; + int ret, i; + + rcu_read_lock(); + + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); + return; + } + + /* check if TID is in operational state */ + spin_lock_bh(&sta->lock); + if (sta->ampdu_mlme.tid_state_rx[tid] + != HT_AGG_STATE_OPERATIONAL) { + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return; + } + sta->ampdu_mlme.tid_state_rx[tid] = + HT_AGG_STATE_REQ_STOP_BA_MSK | + (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + spin_unlock_bh(&sta->lock); + + /* stop HW Rx aggregation. ampdu_action existence + * already verified in session init so we add the BUG_ON */ + BUG_ON(!local->ops->ampdu_action); + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", + ra, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, + &sta->sta, tid, NULL); + if (ret) + printk(KERN_DEBUG "HW problem - can not stop rx " + "aggregation for tid %d\n", tid); + + /* shutdown timer has not expired */ + if (initiator != WLAN_BACK_TIMER) + del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer); + + /* check if this is a self generated aggregation halt */ + if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER) + ieee80211_send_delba(sdata, ra, tid, 0, reason); + + /* free the reordering buffer */ + for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) { + if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) { + /* release the reordered frames */ + dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]); + sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--; + sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL; + } + } + /* free resources */ + kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf); + kfree(sta->ampdu_mlme.tid_rx[tid]); + sta->ampdu_mlme.tid_rx[tid] = NULL; + sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE; + + rcu_read_unlock(); +} + +/* + * After accepting the AddBA Request we activated a timer, + * resetting it after each frame that arrives from the originator. + * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed. + */ +static void sta_rx_agg_session_timer_expired(unsigned long data) +{ + /* not an elegant detour, but there is no choice as the timer passes + * only one argument, and various sta_info are needed here, so init + * flow in sta_info_create gives the TID as data, while the timer_to_id + * array gives the sta through container_of */ + u8 *ptid = (u8 *)data; + u8 *timer_to_id = ptid - *ptid; + struct sta_info *sta = container_of(timer_to_id, struct sta_info, + timer_to_tid[0]); + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); +#endif + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, + (u16)*ptid, WLAN_BACK_TIMER, + WLAN_REASON_QSTA_TIMEOUT); +} + +static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, + u8 dialog_token, u16 status, u16 policy, + u16 buf_size, u16 timeout) +{ + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u16 capab; + + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer " + "for addba resp frame\n", sdata->dev->name); + return; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (sdata->vif.type == NL80211_IFTYPE_AP) + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); + else + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); + mgmt->u.action.category = WLAN_CATEGORY_BACK; + mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; + mgmt->u.action.u.addba_resp.dialog_token = dialog_token; + + capab = (u16)(policy << 1); /* bit 1 aggregation policy */ + capab |= (u16)(tid << 2); /* bit 5:2 TID number */ + capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ + + mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); + mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); + mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); + + ieee80211_tx_skb(sdata, skb, 1); +} + +void ieee80211_process_addba_request(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + struct ieee80211_hw *hw = &local->hw; + struct ieee80211_conf *conf = &hw->conf; + struct tid_ampdu_rx *tid_agg_rx; + u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; + u8 dialog_token; + int ret = -EOPNOTSUPP; + + /* extract session parameters from addba request frame */ + dialog_token = mgmt->u.action.u.addba_req.dialog_token; + timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); + start_seq_num = + le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; + + capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; + tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; + + status = WLAN_STATUS_REQUEST_DECLINED; + + /* sanity check for incoming parameters: + * check if configuration can support the BA policy + * and if buffer size does not exceeds max value */ + /* XXX: check own ht delayed BA capability?? */ + if (((ba_policy != 1) + && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) + || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { + status = WLAN_STATUS_INVALID_QOS_PARAM; +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "AddBA Req with bad params from " + "%pM on tid %u. policy %d, buffer size %d\n", + mgmt->sa, tid, ba_policy, + buf_size); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto end_no_lock; + } + /* determine default buffer size */ + if (buf_size == 0) { + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[conf->channel->band]; + buf_size = IEEE80211_MIN_AMPDU_BUF; + buf_size = buf_size << sband->ht_cap.ampdu_factor; + } + + + /* examine state machine */ + spin_lock_bh(&sta->lock); + + if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "unexpected AddBA Req from " + "%pM on tid %u\n", + mgmt->sa, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto end; + } + + /* prepare A-MPDU MLME for Rx aggregation */ + sta->ampdu_mlme.tid_rx[tid] = + kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); + if (!sta->ampdu_mlme.tid_rx[tid]) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "allocate rx mlme to tid %d failed\n", + tid); +#endif + goto end; + } + /* rx timer */ + sta->ampdu_mlme.tid_rx[tid]->session_timer.function = + sta_rx_agg_session_timer_expired; + sta->ampdu_mlme.tid_rx[tid]->session_timer.data = + (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer); + + tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; + + /* prepare reordering buffer */ + tid_agg_rx->reorder_buf = + kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); + if (!tid_agg_rx->reorder_buf) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "can not allocate reordering buffer " + "to tid %d\n", tid); +#endif + kfree(sta->ampdu_mlme.tid_rx[tid]); + goto end; + } + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, + &sta->sta, tid, &start_seq_num); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + if (ret) { + kfree(tid_agg_rx->reorder_buf); + kfree(tid_agg_rx); + sta->ampdu_mlme.tid_rx[tid] = NULL; + goto end; + } + + /* change state and send addba resp */ + sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL; + tid_agg_rx->dialog_token = dialog_token; + tid_agg_rx->ssn = start_seq_num; + tid_agg_rx->head_seq_num = start_seq_num; + tid_agg_rx->buf_size = buf_size; + tid_agg_rx->timeout = timeout; + tid_agg_rx->stored_mpdu_num = 0; + status = WLAN_STATUS_SUCCESS; +end: + spin_unlock_bh(&sta->lock); + +end_no_lock: + ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, + dialog_token, status, 1, buf_size, timeout); +} diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c new file mode 100644 index 000000000000..6ab731fecc20 --- /dev/null +++ b/net/mac80211/agg-tx.c @@ -0,0 +1,593 @@ +/* + * HT handling + * + * Copyright 2003, Jouni Malinen + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc + * Copyright 2007, Michael Wu + * Copyright 2007-2009, Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include "ieee80211_i.h" +#include "wme.h" + +static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, + const u8 *da, u16 tid, + u8 dialog_token, u16 start_seq_num, + u16 agg_size, u16 timeout) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u16 capab; + + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer " + "for addba request frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (sdata->vif.type == NL80211_IFTYPE_AP) + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); + else + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); + + mgmt->u.action.category = WLAN_CATEGORY_BACK; + mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; + + mgmt->u.action.u.addba_req.dialog_token = dialog_token; + capab = (u16)(1 << 1); /* bit 1 aggregation policy */ + capab |= (u16)(tid << 2); /* bit 5:2 TID number */ + capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */ + + mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); + + mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); + mgmt->u.action.u.addba_req.start_seq_num = + cpu_to_le16(start_seq_num << 4); + + ieee80211_tx_skb(sdata, skb, 1); +} + +void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_bar *bar; + u16 bar_control = 0; + + skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer for " + "bar frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); + memset(bar, 0, sizeof(*bar)); + bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | + IEEE80211_STYPE_BACK_REQ); + memcpy(bar->ra, ra, ETH_ALEN); + memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN); + bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; + bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; + bar_control |= (u16)(tid << 12); + bar->control = cpu_to_le16(bar_control); + bar->start_seq_num = cpu_to_le16(ssn); + + ieee80211_tx_skb(sdata, skb, 0); +} + +/* + * After sending add Block Ack request we activated a timer until + * add Block Ack response will arrive from the recipient. + * If this timer expires sta_addba_resp_timer_expired will be executed. + */ +static void sta_addba_resp_timer_expired(unsigned long data) +{ + /* not an elegant detour, but there is no choice as the timer passes + * only one argument, and both sta_info and TID are needed, so init + * flow in sta_info_create gives the TID as data, while the timer_to_id + * array gives the sta through container_of */ + u16 tid = *(u8 *)data; + struct sta_info *temp_sta = container_of((void *)data, + struct sta_info, timer_to_tid[tid]); + + struct ieee80211_local *local = temp_sta->local; + struct ieee80211_hw *hw = &local->hw; + struct sta_info *sta; + u8 *state; + + rcu_read_lock(); + + sta = sta_info_get(local, temp_sta->sta.addr); + if (!sta) { + rcu_read_unlock(); + return; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + /* check if the TID waits for addBA response */ + spin_lock_bh(&sta->lock); + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { + spin_unlock_bh(&sta->lock); + *state = HT_AGG_STATE_IDLE; +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "timer expired on tid %d but we are not " + "expecting addBA response there", tid); +#endif + goto timer_expired_exit; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); +#endif + + /* go through the state check in stop_BA_session */ + *state = HT_AGG_STATE_OPERATIONAL; + spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid, + WLAN_BACK_INITIATOR); + +timer_expired_exit: + rcu_read_unlock(); +} + +int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata; + u16 start_seq_num; + u8 *state; + int ret = 0; + + if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) + return -EINVAL; + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n", + ra, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + rcu_read_lock(); + + sta = sta_info_get(local, ra); + if (!sta) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find the station\n"); +#endif + ret = -ENOENT; + goto exit; + } + + spin_lock_bh(&sta->lock); + + /* we have tried too many times, receiver does not want A-MPDU */ + if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { + ret = -EBUSY; + goto err_unlock_sta; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + /* check if the TID is not in aggregation flow already */ + if (*state != HT_AGG_STATE_IDLE) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - session is not " + "idle on tid %u\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + ret = -EAGAIN; + goto err_unlock_sta; + } + + /* prepare A-MPDU MLME for Tx aggregation */ + sta->ampdu_mlme.tid_tx[tid] = + kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); + if (!sta->ampdu_mlme.tid_tx[tid]) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "allocate tx mlme to tid %d failed\n", + tid); +#endif + ret = -ENOMEM; + goto err_unlock_sta; + } + /* Tx timer */ + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = + sta_addba_resp_timer_expired; + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = + (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); + + if (hw->ampdu_queues) { + /* create a new queue for this aggregation */ + ret = ieee80211_ht_agg_queue_add(local, sta, tid); + + /* case no queue is available to aggregation + * don't switch to aggregation */ + if (ret) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - " + "queue unavailable for tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto err_unlock_queue; + } + } + sdata = sta->sdata; + + /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the + * call back right away, it must see that the flow has begun */ + *state |= HT_ADDBA_REQUESTED_MSK; + + /* This is slightly racy because the queue isn't stopped */ + start_seq_num = sta->tid_seq[tid]; + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, + &sta->sta, tid, &start_seq_num); + + if (ret) { + /* No need to requeue the packets in the agg queue, since we + * held the tx lock: no packet could be enqueued to the newly + * allocated queue */ + if (hw->ampdu_queues) + ieee80211_ht_agg_queue_remove(local, sta, tid, 0); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - HW unavailable for" + " tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + *state = HT_AGG_STATE_IDLE; + goto err_unlock_queue; + } + + /* Will put all the packets in the new SW queue */ + if (hw->ampdu_queues) + ieee80211_requeue(local, ieee802_1d_to_ac[tid]); + spin_unlock_bh(&sta->lock); + + /* send an addBA request */ + sta->ampdu_mlme.dialog_token_allocator++; + sta->ampdu_mlme.tid_tx[tid]->dialog_token = + sta->ampdu_mlme.dialog_token_allocator; + sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + + + ieee80211_send_addba_request(sta->sdata, ra, tid, + sta->ampdu_mlme.tid_tx[tid]->dialog_token, + sta->ampdu_mlme.tid_tx[tid]->ssn, + 0x40, 5000); + /* activate the timer for the recipient's addBA response */ + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = + jiffies + ADDBA_RESP_INTERVAL; + add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); +#endif + goto exit; + +err_unlock_queue: + kfree(sta->ampdu_mlme.tid_tx[tid]); + sta->ampdu_mlme.tid_tx[tid] = NULL; + ret = -EBUSY; +err_unlock_sta: + spin_unlock_bh(&sta->lock); +exit: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_session); + +void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + + if (tid >= STA_TID_NUM) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); +#endif + return; + } + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find station: %pM\n", ra); +#endif + return; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + spin_lock_bh(&sta->lock); + + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", + *state); +#endif + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return; + } + + WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK); + + *state |= HT_ADDBA_DRV_READY_MSK; + + if (*state == HT_AGG_STATE_OPERATIONAL) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); +#endif + if (hw->ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + } + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); + + +int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, + u8 *ra, u16 tid, + enum ieee80211_back_parties initiator) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + int ret = 0; + + if (tid >= STA_TID_NUM) + return -EINVAL; + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); + return -ENOENT; + } + + /* check if the TID is in aggregation */ + state = &sta->ampdu_mlme.tid_state_tx[tid]; + spin_lock_bh(&sta->lock); + + if (*state != HT_AGG_STATE_OPERATIONAL) { + ret = -ENOENT; + goto stop_BA_exit; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", + ra, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + if (hw->ampdu_queues) + ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); + + *state = HT_AGG_STATE_REQ_STOP_BA_MSK | + (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, + &sta->sta, tid, NULL); + + /* case HW denied going back to legacy */ + if (ret) { + WARN_ON(ret != -EBUSY); + *state = HT_AGG_STATE_OPERATIONAL; + if (hw->ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + goto stop_BA_exit; + } + +stop_BA_exit: + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); + +void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + int agg_queue; + + if (tid >= STA_TID_NUM) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); +#endif + return; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Stopping Tx BA session for %pM tid %d\n", + ra, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find station: %pM\n", ra); +#endif + rcu_read_unlock(); + return; + } + state = &sta->ampdu_mlme.tid_state_tx[tid]; + + /* NOTE: no need to use sta->lock in this state check, as + * ieee80211_stop_tx_ba_session will let only one stop call to + * pass through per sta/tid + */ + if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); +#endif + rcu_read_unlock(); + return; + } + + if (*state & HT_AGG_STATE_INITIATOR_MSK) + ieee80211_send_delba(sta->sdata, ra, tid, + WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); + + if (hw->ampdu_queues) { + agg_queue = sta->tid_to_tx_q[tid]; + ieee80211_ht_agg_queue_remove(local, sta, tid, 1); + + /* We just requeued the all the frames that were in the + * removed queue, and since we might miss a softirq we do + * netif_schedule_queue. ieee80211_wake_queue is not used + * here as this queue is not necessarily stopped + */ + netif_schedule_queue(netdev_get_tx_queue(local->mdev, + agg_queue)); + } + spin_lock_bh(&sta->lock); + *state = HT_AGG_STATE_IDLE; + sta->ampdu_mlme.addba_req_num[tid] = 0; + kfree(sta->ampdu_mlme.tid_tx[tid]); + sta->ampdu_mlme.tid_tx[tid] = NULL; + spin_unlock_bh(&sta->lock); + + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); + +void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, + const u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_ra_tid *ra_tid; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_WARNING "%s: Not enough memory, " + "dropping start BA session", skb->dev->name); +#endif + return; + } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; + memcpy(&ra_tid->ra, ra, ETH_ALEN); + ra_tid->tid = tid; + + skb->pkt_type = IEEE80211_ADDBA_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); + +void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, + const u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_ra_tid *ra_tid; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_WARNING "%s: Not enough memory, " + "dropping stop BA session", skb->dev->name); +#endif + return; + } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; + memcpy(&ra_tid->ra, ra, ETH_ALEN); + ra_tid->tid = tid; + + skb->pkt_type = IEEE80211_DELBA_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); + +void ieee80211_process_addba_resp(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + struct ieee80211_hw *hw = &local->hw; + u16 capab; + u16 tid, start_seq_num; + u8 *state; + + capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); + tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + + spin_lock_bh(&sta->lock); + + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { + spin_unlock_bh(&sta->lock); + return; + } + + if (mgmt->u.action.u.addba_resp.dialog_token != + sta->ampdu_mlme.tid_tx[tid]->dialog_token) { + spin_unlock_bh(&sta->lock); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + return; + } + + del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) + == WLAN_STATUS_SUCCESS) { + *state |= HT_ADDBA_RECEIVED_MSK; + sta->ampdu_mlme.addba_req_num[tid] = 0; + + if (*state == HT_AGG_STATE_OPERATIONAL && + local->hw.ampdu_queues) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + + if (local->ops->ampdu_action) { + (void)local->ops->ampdu_action(hw, + IEEE80211_AMPDU_TX_RESUME, + &sta->sta, tid, &start_seq_num); + } +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + spin_unlock_bh(&sta->lock); + } else { + sta->ampdu_mlme.addba_req_num[tid]++; + /* this will allow the state check in stop_BA_session */ + *state = HT_AGG_STATE_OPERATIONAL; + spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid, + WLAN_BACK_INITIATOR); + } +} diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 7a38d2e76ca9..869ea5fd3f51 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -17,8 +17,6 @@ #include #include #include "ieee80211_i.h" -#include "sta_info.h" -#include "wme.h" void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, struct ieee80211_ht_cap *ht_cap_ie, @@ -155,105 +153,23 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, return changed; } -static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, - const u8 *da, u16 tid, - u8 dialog_token, u16 start_seq_num, - u16 agg_size, u16 timeout) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u16 capab; - - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for addba request frame\n", sdata->dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == NL80211_IFTYPE_AP) - memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_ACTION); - - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); - - mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; - - mgmt->u.action.u.addba_req.dialog_token = dialog_token; - capab = (u16)(1 << 1); /* bit 1 aggregation policy */ - capab |= (u16)(tid << 2); /* bit 5:2 TID number */ - capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */ - - mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); - - mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_req.start_seq_num = - cpu_to_le16(start_seq_num << 4); - - ieee80211_tx_skb(sdata, skb, 1); -} - -static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, - u8 dialog_token, u16 status, u16 policy, - u16 buf_size, u16 timeout) +void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; struct ieee80211_local *local = sdata->local; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u16 capab; - - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + int i; - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer " - "for addba resp frame\n", sdata->dev->name); - return; + for (i = 0; i < STA_TID_NUM; i++) { + ieee80211_stop_tx_ba_session(&local->hw, addr, i, + WLAN_BACK_INITIATOR); + ieee80211_sta_stop_rx_ba_session(sdata, addr, i, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS); } - - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == NL80211_IFTYPE_AP) - memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_ACTION); - - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); - mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; - mgmt->u.action.u.addba_resp.dialog_token = dialog_token; - - capab = (u16)(policy << 1); /* bit 1 aggregation policy */ - capab |= (u16)(tid << 2); /* bit 5:2 TID number */ - capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ - - mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); - mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); - - ieee80211_tx_skb(sdata, skb, 1); } -static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, - const u8 *da, u16 tid, - u16 initiator, u16 reason_code) +void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, + const u8 *da, u16 tid, + u16 initiator, u16 reason_code) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_sta *ifsta = &sdata->u.sta; @@ -294,767 +210,6 @@ static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb, 1); } -void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn) -{ - struct ieee80211_local *local = sdata->local; - struct sk_buff *skb; - struct ieee80211_bar *bar; - u16 bar_control = 0; - - skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer for " - "bar frame\n", sdata->dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); - memset(bar, 0, sizeof(*bar)); - bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | - IEEE80211_STYPE_BACK_REQ); - memcpy(bar->ra, ra, ETH_ALEN); - memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN); - bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; - bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; - bar_control |= (u16)(tid << 12); - bar->control = cpu_to_le16(bar_control); - bar->start_seq_num = cpu_to_le16(ssn); - - ieee80211_tx_skb(sdata, skb, 0); -} - -void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, - u16 initiator, u16 reason) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; - int ret, i; - - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - return; - } - - /* check if TID is in operational state */ - spin_lock_bh(&sta->lock); - if (sta->ampdu_mlme.tid_state_rx[tid] - != HT_AGG_STATE_OPERATIONAL) { - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); - return; - } - sta->ampdu_mlme.tid_state_rx[tid] = - HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - spin_unlock_bh(&sta->lock); - - /* stop HW Rx aggregation. ampdu_action existence - * already verified in session init so we add the BUG_ON */ - BUG_ON(!local->ops->ampdu_action); - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", - ra, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, - &sta->sta, tid, NULL); - if (ret) - printk(KERN_DEBUG "HW problem - can not stop rx " - "aggregation for tid %d\n", tid); - - /* shutdown timer has not expired */ - if (initiator != WLAN_BACK_TIMER) - del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer); - - /* check if this is a self generated aggregation halt */ - if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER) - ieee80211_send_delba(sdata, ra, tid, 0, reason); - - /* free the reordering buffer */ - for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) { - if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) { - /* release the reordered frames */ - dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]); - sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--; - sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL; - } - } - /* free resources */ - kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf); - kfree(sta->ampdu_mlme.tid_rx[tid]); - sta->ampdu_mlme.tid_rx[tid] = NULL; - sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE; - - rcu_read_unlock(); -} - - -/* - * After sending add Block Ack request we activated a timer until - * add Block Ack response will arrive from the recipient. - * If this timer expires sta_addba_resp_timer_expired will be executed. - */ -static void sta_addba_resp_timer_expired(unsigned long data) -{ - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and both sta_info and TID are needed, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u16 tid = *(u8 *)data; - struct sta_info *temp_sta = container_of((void *)data, - struct sta_info, timer_to_tid[tid]); - - struct ieee80211_local *local = temp_sta->local; - struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; - u8 *state; - - rcu_read_lock(); - - sta = sta_info_get(local, temp_sta->sta.addr); - if (!sta) { - rcu_read_unlock(); - return; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - /* check if the TID waits for addBA response */ - spin_lock_bh(&sta->lock); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->lock); - *state = HT_AGG_STATE_IDLE; -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "timer expired on tid %d but we are not " - "expecting addBA response there", tid); -#endif - goto timer_expired_exit; - } - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); -#endif - - /* go through the state check in stop_BA_session */ - *state = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid, - WLAN_BACK_INITIATOR); - -timer_expired_exit: - rcu_read_unlock(); -} - -void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr) -{ - struct ieee80211_local *local = sdata->local; - int i; - - for (i = 0; i < STA_TID_NUM; i++) { - ieee80211_stop_tx_ba_session(&local->hw, addr, i, - WLAN_BACK_INITIATOR); - ieee80211_sta_stop_rx_ba_session(sdata, addr, i, - WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS); - } -} - -int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata; - u16 start_seq_num; - u8 *state; - int ret = 0; - - if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) - return -EINVAL; - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Open BA session requested for %pM tid %u\n", - ra, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find the station\n"); -#endif - ret = -ENOENT; - goto exit; - } - - spin_lock_bh(&sta->lock); - - /* we have tried too many times, receiver does not want A-MPDU */ - if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { - ret = -EBUSY; - goto err_unlock_sta; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - /* check if the TID is not in aggregation flow already */ - if (*state != HT_AGG_STATE_IDLE) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - session is not " - "idle on tid %u\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - ret = -EAGAIN; - goto err_unlock_sta; - } - - /* prepare A-MPDU MLME for Tx aggregation */ - sta->ampdu_mlme.tid_tx[tid] = - kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_tx[tid]) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "allocate tx mlme to tid %d failed\n", - tid); -#endif - ret = -ENOMEM; - goto err_unlock_sta; - } - /* Tx timer */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = - sta_addba_resp_timer_expired; - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - - if (hw->ampdu_queues) { - /* create a new queue for this aggregation */ - ret = ieee80211_ht_agg_queue_add(local, sta, tid); - - /* case no queue is available to aggregation - * don't switch to aggregation */ - if (ret) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - " - "queue unavailable for tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto err_unlock_queue; - } - } - sdata = sta->sdata; - - /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the - * call back right away, it must see that the flow has begun */ - *state |= HT_ADDBA_REQUESTED_MSK; - - /* This is slightly racy because the queue isn't stopped */ - start_seq_num = sta->tid_seq[tid]; - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, - &sta->sta, tid, &start_seq_num); - - if (ret) { - /* No need to requeue the packets in the agg queue, since we - * held the tx lock: no packet could be enqueued to the newly - * allocated queue */ - if (hw->ampdu_queues) - ieee80211_ht_agg_queue_remove(local, sta, tid, 0); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - HW unavailable for" - " tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - *state = HT_AGG_STATE_IDLE; - goto err_unlock_queue; - } - - /* Will put all the packets in the new SW queue */ - if (hw->ampdu_queues) - ieee80211_requeue(local, ieee802_1d_to_ac[tid]); - spin_unlock_bh(&sta->lock); - - /* send an addBA request */ - sta->ampdu_mlme.dialog_token_allocator++; - sta->ampdu_mlme.tid_tx[tid]->dialog_token = - sta->ampdu_mlme.dialog_token_allocator; - sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; - - - ieee80211_send_addba_request(sta->sdata, ra, tid, - sta->ampdu_mlme.tid_tx[tid]->dialog_token, - sta->ampdu_mlme.tid_tx[tid]->ssn, - 0x40, 5000); - /* activate the timer for the recipient's addBA response */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = - jiffies + ADDBA_RESP_INTERVAL; - add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); -#endif - goto exit; - -err_unlock_queue: - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; - ret = -EBUSY; -err_unlock_sta: - spin_unlock_bh(&sta->lock); -exit: - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_session); - -int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, - u8 *ra, u16 tid, - enum ieee80211_back_parties initiator) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - int ret = 0; - - if (tid >= STA_TID_NUM) - return -EINVAL; - - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - return -ENOENT; - } - - /* check if the TID is in aggregation */ - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); - - if (*state != HT_AGG_STATE_OPERATIONAL) { - ret = -ENOENT; - goto stop_BA_exit; - } - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", - ra, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - if (hw->ampdu_queues) - ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); - - *state = HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, - &sta->sta, tid, NULL); - - /* case HW denied going back to legacy */ - if (ret) { - WARN_ON(ret != -EBUSY); - *state = HT_AGG_STATE_OPERATIONAL; - if (hw->ampdu_queues) - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - goto stop_BA_exit; - } - -stop_BA_exit: - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); - -void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - - if (tid >= STA_TID_NUM) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", - tid, STA_TID_NUM); -#endif - return; - } - - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find station: %pM\n", ra); -#endif - return; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); - - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", - *state); -#endif - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); - return; - } - - WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK); - - *state |= HT_ADDBA_DRV_READY_MSK; - - if (*state == HT_AGG_STATE_OPERATIONAL) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); -#endif - if (hw->ampdu_queues) - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - } - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); - -void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - int agg_queue; - - if (tid >= STA_TID_NUM) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", - tid, STA_TID_NUM); -#endif - return; - } - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Stopping Tx BA session for %pM tid %d\n", - ra, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find station: %pM\n", ra); -#endif - rcu_read_unlock(); - return; - } - state = &sta->ampdu_mlme.tid_state_tx[tid]; - - /* NOTE: no need to use sta->lock in this state check, as - * ieee80211_stop_tx_ba_session will let only one stop call to - * pass through per sta/tid - */ - if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); -#endif - rcu_read_unlock(); - return; - } - - if (*state & HT_AGG_STATE_INITIATOR_MSK) - ieee80211_send_delba(sta->sdata, ra, tid, - WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - - if (hw->ampdu_queues) { - agg_queue = sta->tid_to_tx_q[tid]; - ieee80211_ht_agg_queue_remove(local, sta, tid, 1); - - /* We just requeued the all the frames that were in the - * removed queue, and since we might miss a softirq we do - * netif_schedule_queue. ieee80211_wake_queue is not used - * here as this queue is not necessarily stopped - */ - netif_schedule_queue(netdev_get_tx_queue(local->mdev, - agg_queue)); - } - spin_lock_bh(&sta->lock); - *state = HT_AGG_STATE_IDLE; - sta->ampdu_mlme.addba_req_num[tid] = 0; - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; - spin_unlock_bh(&sta->lock); - - rcu_read_unlock(); -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); - -void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, - const u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping start BA session", skb->dev->name); -#endif - return; - } - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; - - skb->pkt_type = IEEE80211_ADDBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); - -void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, - const u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping stop BA session", skb->dev->name); -#endif - return; - } - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; - - skb->pkt_type = IEEE80211_DELBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); - -/* - * After accepting the AddBA Request we activated a timer, - * resetting it after each frame that arrives from the originator. - * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed. - */ -static void sta_rx_agg_session_timer_expired(unsigned long data) -{ - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and various sta_info are needed here, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u8 *ptid = (u8 *)data; - u8 *timer_to_id = ptid - *ptid; - struct sta_info *sta = container_of(timer_to_id, struct sta_info, - timer_to_tid[0]); - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); -#endif - ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, - (u16)*ptid, WLAN_BACK_TIMER, - WLAN_REASON_QSTA_TIMEOUT); -} - -void ieee80211_process_addba_request(struct ieee80211_local *local, - struct sta_info *sta, - struct ieee80211_mgmt *mgmt, - size_t len) -{ - struct ieee80211_hw *hw = &local->hw; - struct ieee80211_conf *conf = &hw->conf; - struct tid_ampdu_rx *tid_agg_rx; - u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; - u8 dialog_token; - int ret = -EOPNOTSUPP; - - /* extract session parameters from addba request frame */ - dialog_token = mgmt->u.action.u.addba_req.dialog_token; - timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); - start_seq_num = - le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; - - capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); - ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; - tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; - - status = WLAN_STATUS_REQUEST_DECLINED; - - /* sanity check for incoming parameters: - * check if configuration can support the BA policy - * and if buffer size does not exceeds max value */ - /* XXX: check own ht delayed BA capability?? */ - if (((ba_policy != 1) - && (!(sta->sta.ht_cap.cap & IEEE80211_HT_CAP_DELAY_BA))) - || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { - status = WLAN_STATUS_INVALID_QOS_PARAM; -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "AddBA Req with bad params from " - "%pM on tid %u. policy %d, buffer size %d\n", - mgmt->sa, tid, ba_policy, - buf_size); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto end_no_lock; - } - /* determine default buffer size */ - if (buf_size == 0) { - struct ieee80211_supported_band *sband; - - sband = local->hw.wiphy->bands[conf->channel->band]; - buf_size = IEEE80211_MIN_AMPDU_BUF; - buf_size = buf_size << sband->ht_cap.ampdu_factor; - } - - - /* examine state machine */ - spin_lock_bh(&sta->lock); - - if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "unexpected AddBA Req from " - "%pM on tid %u\n", - mgmt->sa, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto end; - } - - /* prepare A-MPDU MLME for Rx aggregation */ - sta->ampdu_mlme.tid_rx[tid] = - kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_rx[tid]) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "allocate rx mlme to tid %d failed\n", - tid); -#endif - goto end; - } - /* rx timer */ - sta->ampdu_mlme.tid_rx[tid]->session_timer.function = - sta_rx_agg_session_timer_expired; - sta->ampdu_mlme.tid_rx[tid]->session_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer); - - tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; - - /* prepare reordering buffer */ - tid_agg_rx->reorder_buf = - kcalloc(buf_size, sizeof(struct sk_buff *), GFP_ATOMIC); - if (!tid_agg_rx->reorder_buf) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "can not allocate reordering buffer " - "to tid %d\n", tid); -#endif - kfree(sta->ampdu_mlme.tid_rx[tid]); - goto end; - } - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, - &sta->sta, tid, &start_seq_num); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - if (ret) { - kfree(tid_agg_rx->reorder_buf); - kfree(tid_agg_rx); - sta->ampdu_mlme.tid_rx[tid] = NULL; - goto end; - } - - /* change state and send addba resp */ - sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL; - tid_agg_rx->dialog_token = dialog_token; - tid_agg_rx->ssn = start_seq_num; - tid_agg_rx->head_seq_num = start_seq_num; - tid_agg_rx->buf_size = buf_size; - tid_agg_rx->timeout = timeout; - tid_agg_rx->stored_mpdu_num = 0; - status = WLAN_STATUS_SUCCESS; -end: - spin_unlock_bh(&sta->lock); - -end_no_lock: - ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, - dialog_token, status, 1, buf_size, timeout); -} - -void ieee80211_process_addba_resp(struct ieee80211_local *local, - struct sta_info *sta, - struct ieee80211_mgmt *mgmt, - size_t len) -{ - struct ieee80211_hw *hw = &local->hw; - u16 capab; - u16 tid, start_seq_num; - u8 *state; - - capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); - tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - - spin_lock_bh(&sta->lock); - - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->lock); - return; - } - - if (mgmt->u.action.u.addba_resp.dialog_token != - sta->ampdu_mlme.tid_tx[tid]->dialog_token) { - spin_unlock_bh(&sta->lock); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - return; - } - - del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) - == WLAN_STATUS_SUCCESS) { - *state |= HT_ADDBA_RECEIVED_MSK; - sta->ampdu_mlme.addba_req_num[tid] = 0; - - if (*state == HT_AGG_STATE_OPERATIONAL && - local->hw.ampdu_queues) - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - - if (local->ops->ampdu_action) { - (void)local->ops->ampdu_action(hw, - IEEE80211_AMPDU_TX_RESUME, - &sta->sta, tid, &start_seq_num); - } -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - spin_unlock_bh(&sta->lock); - } else { - sta->ampdu_mlme.addba_req_num[tid]++; - /* this will allow the state check in stop_BA_session */ - *state = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid, - WLAN_BACK_INITIATOR); - } -} - void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 5b230015f938..6987dfa41c7f 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -987,6 +987,9 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, struct ieee80211_ht_info *hti, u16 ap_ht_cap_flags); void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); +void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, + const u8 *da, u16 tid, + u16 initiator, u16 reason_code); void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, u16 initiator, u16 reason); -- cgit v1.2.3 From 8abd3f9bc476b5b7f6de1b6fb576b87ba338f7fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:47 +0100 Subject: mac80211: restrict aggregation to supported interface modes We can only support aggregation on AP/STA right now. HT isn't defined for IBSS, WDS or MESH. In the WDS/MESH cases it's not clear what to put into the IBSS field, and we don't handle that in the code at all. Also fix the code to handle VLAN correctly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 3 ++- net/mac80211/agg-tx.c | 16 +++++++++++++++- net/mac80211/ht.c | 3 ++- net/mac80211/rx.c | 11 +++++++++++ 4 files changed, 30 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 62b9feb3c804..d7afd0956970 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -134,7 +134,8 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d memset(mgmt, 0, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == NL80211_IFTYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); else memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 6ab731fecc20..c91b32a3f0e7 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -41,7 +41,8 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, memset(mgmt, 0, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == NL80211_IFTYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); else memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); @@ -180,6 +181,19 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) goto exit; } + /* + * The aggregation code is not prepared to handle + * anything but STA/AP due to the BSSID handling. + * IBSS could work in the code but isn't supported + * by drivers or the standard. + */ + if (sta->sdata->vif.type != NL80211_IFTYPE_STATION && + sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sta->sdata->vif.type != NL80211_IFTYPE_AP) { + ret = -EINVAL; + goto exit; + } + spin_lock_bh(&sta->lock); /* we have tried too many times, receiver does not want A-MPDU */ diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 869ea5fd3f51..a49a8a5828bf 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -190,7 +190,8 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, memset(mgmt, 0, 24); memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == NL80211_IFTYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); else memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f34cc66d3f4b..1327d424bf31 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1768,6 +1768,17 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.category) { case WLAN_CATEGORY_BACK: + /* + * The aggregation code is not prepared to handle + * anything but STA/AP due to the BSSID handling; + * IBSS could work in the code but isn't supported + * by drivers or the standard. + */ + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) + return RX_DROP_MONITOR; + switch (mgmt->u.action.u.addba_req.action_code) { case WLAN_ACTION_ADDBA_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + -- cgit v1.2.3 From 955d3fe3e8b38de19761e4ac7afdb9d7a33b9566 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:48 +0100 Subject: mac80211: hardware should not deny going back to legacy Doing so would be an MLME protocol violation when the peer disabled the aggregation session. Quick driver review indicates that there are error codes passed all over the drivers but cannot ever be nonzero except in error conditions that would indicate mac80211 bugs. No real changes here, since no drivers currently can return -EBUSY. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index c91b32a3f0e7..73abff956548 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -407,9 +407,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, &sta->sta, tid, NULL); - /* case HW denied going back to legacy */ - if (ret) { - WARN_ON(ret != -EBUSY); + /* HW shall not deny going back to legacy */ + if (WARN_ON(ret)) { *state = HT_AGG_STATE_OPERATIONAL; if (hw->ampdu_queues) ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); -- cgit v1.2.3 From 86ab6c5a6c5204f6c25281b9039330b8f5e9b692 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:49 +0100 Subject: mac80211: document TX aggregation (and small cleanup) Add documentation and move ieee80211_start_tx_ba_cb_irqsafe to right after ieee80211_start_tx_ba_cb. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 76 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 73abff956548..61bb7db04808 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -18,6 +18,31 @@ #include "ieee80211_i.h" #include "wme.h" +/** + * DOC: TX aggregation + * + * Aggregation on the TX side requires setting the hardware flag + * %IEEE80211_HW_AMPDU_AGGREGATION as well as, if present, the @ampdu_queues + * hardware parameter to the number of hardware AMPDU queues. If there are no + * hardware queues then the driver will (currently) have to do all frame + * buffering. + * + * When TX aggregation is started by some subsystem (usually the rate control + * algorithm would be appropriate) by calling the + * ieee80211_start_tx_ba_session() function, the driver will be notified via + * its @ampdu_action function, with the %IEEE80211_AMPDU_TX_START action. + * + * In response to that, the driver is later required to call the + * ieee80211_start_tx_ba_cb() (or ieee80211_start_tx_ba_cb_irqsafe()) + * function, which will start the aggregation session. + * + * Similarly, when the aggregation session is stopped by + * ieee80211_stop_tx_ba_session(), the driver's @ampdu_action function will + * be called with the action %IEEE80211_AMPDU_TX_STOP. In this case, the + * call must not fail, and the driver must later call ieee80211_stop_tx_ba_cb() + * (or ieee80211_stop_tx_ba_cb_irqsafe()). + */ + static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, u8 dialog_token, u16 start_seq_num, @@ -363,6 +388,31 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); +void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, + const u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_ra_tid *ra_tid; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_WARNING "%s: Not enough memory, " + "dropping start BA session", skb->dev->name); +#endif + return; + } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; + memcpy(&ra_tid->ra, ra, ETH_ALEN); + ra_tid->tid = tid; + + skb->pkt_type = IEEE80211_ADDBA_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); + int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid, @@ -492,31 +542,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) } EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); -void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, - const u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping start BA session", skb->dev->name); -#endif - return; - } - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; - - skb->pkt_type = IEEE80211_ADDBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); - void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, const u8 *ra, u16 tid) { @@ -542,6 +567,7 @@ void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); + void ieee80211_process_addba_resp(struct ieee80211_local *local, struct sta_info *sta, struct ieee80211_mgmt *mgmt, -- cgit v1.2.3 From 23e6a7ea5cb1a902d37ab0c783709c178fa834df Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:50 +0100 Subject: mac80211: fix race in TX aggregation When disabling TX aggregation because it was rejected or from the timer (it was not accepted), there is a window where we first set the state to operation, unlock, and then undo the whole thing. Avoid that by splitting up the stop function. Also get rid of the pointless sta_info indirection in the timer. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 95 ++++++++++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 47 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 61bb7db04808..a49b76f61da3 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -123,6 +123,34 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 ieee80211_tx_skb(sdata, skb, 0); } +static int __ieee80211_stop_tx_ba_session(struct ieee80211_local *local, + struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator) +{ + int ret; + u8 *state; + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + + if (local->hw.ampdu_queues) + ieee80211_stop_queue(&local->hw, sta->tid_to_tx_q[tid]); + + *state = HT_AGG_STATE_REQ_STOP_BA_MSK | + (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + + ret = local->ops->ampdu_action(&local->hw, IEEE80211_AMPDU_TX_STOP, + &sta->sta, tid, NULL); + + /* HW shall not deny going back to legacy */ + if (WARN_ON(ret)) { + *state = HT_AGG_STATE_OPERATIONAL; + if (local->hw.ampdu_queues) + ieee80211_wake_queue(&local->hw, sta->tid_to_tx_q[tid]); + } + + return ret; +} + /* * After sending add Block Ack request we activated a timer until * add Block Ack response will arrive from the recipient. @@ -135,23 +163,13 @@ static void sta_addba_resp_timer_expired(unsigned long data) * flow in sta_info_create gives the TID as data, while the timer_to_id * array gives the sta through container_of */ u16 tid = *(u8 *)data; - struct sta_info *temp_sta = container_of((void *)data, + struct sta_info *sta = container_of((void *)data, struct sta_info, timer_to_tid[tid]); - - struct ieee80211_local *local = temp_sta->local; - struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; + struct ieee80211_local *local = sta->local; u8 *state; - rcu_read_lock(); - - sta = sta_info_get(local, temp_sta->sta.addr); - if (!sta) { - rcu_read_unlock(); - return; - } - state = &sta->ampdu_mlme.tid_state_tx[tid]; + /* check if the TID waits for addBA response */ spin_lock_bh(&sta->lock); if (!(*state & HT_ADDBA_REQUESTED_MSK)) { @@ -161,21 +179,15 @@ static void sta_addba_resp_timer_expired(unsigned long data) printk(KERN_DEBUG "timer expired on tid %d but we are not " "expecting addBA response there", tid); #endif - goto timer_expired_exit; + return; } #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); #endif - /* go through the state check in stop_BA_session */ - *state = HT_AGG_STATE_OPERATIONAL; + __ieee80211_stop_tx_ba_session(local, sta, tid, WLAN_BACK_INITIATOR); spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid, - WLAN_BACK_INITIATOR); - -timer_expired_exit: - rcu_read_unlock(); } int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) @@ -187,6 +199,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) u8 *state; int ret = 0; + if (WARN_ON(!local->ops->ampdu_action)) + return -EINVAL; + if ((tid >= STA_TID_NUM) || !(hw->flags & IEEE80211_HW_AMPDU_AGGREGATION)) return -EINVAL; @@ -280,9 +295,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) /* This is slightly racy because the queue isn't stopped */ start_seq_num = sta->tid_seq[tid]; - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, - &sta->sta, tid, &start_seq_num); + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, + &sta->sta, tid, &start_seq_num); if (ret) { /* No need to requeue the packets in the agg queue, since we @@ -423,6 +437,9 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, u8 *state; int ret = 0; + if (WARN_ON(!local->ops->ampdu_action)) + return -EINVAL; + if (tid >= STA_TID_NUM) return -EINVAL; @@ -439,7 +456,7 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, if (*state != HT_AGG_STATE_OPERATIONAL) { ret = -ENOENT; - goto stop_BA_exit; + goto unlock; } #ifdef CONFIG_MAC80211_HT_DEBUG @@ -447,27 +464,13 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - if (hw->ampdu_queues) - ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); - - *state = HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + ret = __ieee80211_stop_tx_ba_session(local, sta, tid, initiator); - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, - &sta->sta, tid, NULL); - - /* HW shall not deny going back to legacy */ - if (WARN_ON(ret)) { - *state = HT_AGG_STATE_OPERATIONAL; - if (hw->ampdu_queues) - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - goto stop_BA_exit; - } - -stop_BA_exit: + unlock: spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); @@ -623,10 +626,8 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, spin_unlock_bh(&sta->lock); } else { sta->ampdu_mlme.addba_req_num[tid]++; - /* this will allow the state check in stop_BA_session */ - *state = HT_AGG_STATE_OPERATIONAL; + __ieee80211_stop_tx_ba_session(local, sta, tid, + WLAN_BACK_INITIATOR); spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid, - WLAN_BACK_INITIATOR); } } -- cgit v1.2.3 From 55687e380a3965ac448e03281e027553a6ae6dac Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:51 +0100 Subject: mac80211: fix aggregation timer lockups As far as I can tell, there are possible lockups because both the RX session_timer and TX addba_resp_timer are del_timer_sync'ed under the sta spinlock which both timer functions take. Additionally, the TX agg code seems to leak memory when TX aggregation is not disabled before the sta_info is freed. Fix this by making the free code a little smarter in the RX agg case, and actually make the sta_info_destroy code free the TX agg info in the TX agg case. We won't notify the peer, but it'll notice something is wrong anyway, and normally this only happens after we've told it in some other way we will no longer talk to it. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 11 +++++++++-- net/mac80211/sta_info.c | 37 +++++++++++++++++++++++++++++++++---- net/mac80211/sta_info.h | 1 + 3 files changed, 43 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index d7afd0956970..4b571b211625 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -78,11 +78,18 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL; } } + + spin_lock_bh(&sta->lock); /* free resources */ kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf); - kfree(sta->ampdu_mlme.tid_rx[tid]); - sta->ampdu_mlme.tid_rx[tid] = NULL; + + if (!sta->ampdu_mlme.tid_rx[tid]->shutdown) { + kfree(sta->ampdu_mlme.tid_rx[tid]); + sta->ampdu_mlme.tid_rx[tid] = NULL; + } + sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE; + spin_unlock_bh(&sta->lock); rcu_read_unlock(); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 10c5539c20ab..634f65c0130e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -194,12 +194,41 @@ void sta_info_destroy(struct sta_info *sta) dev_kfree_skb_any(skb); for (i = 0; i < STA_TID_NUM; i++) { + struct tid_ampdu_rx *tid_rx; + struct tid_ampdu_tx *tid_tx; + spin_lock_bh(&sta->lock); - if (sta->ampdu_mlme.tid_rx[i]) - del_timer_sync(&sta->ampdu_mlme.tid_rx[i]->session_timer); - if (sta->ampdu_mlme.tid_tx[i]) - del_timer_sync(&sta->ampdu_mlme.tid_tx[i]->addba_resp_timer); + tid_rx = sta->ampdu_mlme.tid_rx[i]; + /* Make sure timer won't free the tid_rx struct, see below */ + if (tid_rx) + tid_rx->shutdown = true; spin_unlock_bh(&sta->lock); + + /* + * Outside spinlock - shutdown is true now so that the timer + * won't free tid_rx, we have to do that now. Can't let the + * timer do it because we have to sync the timer outside the + * lock that it takes itself. + */ + if (tid_rx) { + del_timer_sync(&tid_rx->session_timer); + kfree(tid_rx); + } + + /* + * No need to do such complications for TX agg sessions, the + * path leading to freeing the tid_tx struct goes via a call + * from the driver, and thus needs to look up the sta struct + * again, which cannot be found when we get here. Hence, we + * just need to delete the timer and free the aggregation + * info; we won't be telling the peer about it then but that + * doesn't matter if we're not talking to it again anyway. + */ + tid_tx = sta->ampdu_mlme.tid_tx[i]; + if (tid_tx) { + del_timer_sync(&tid_tx->addba_resp_timer); + kfree(tid_tx); + } } __sta_info_free(local, sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index a070bd929e00..d9653231992f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -100,6 +100,7 @@ struct tid_ampdu_rx { u16 buf_size; u16 timeout; u8 dialog_token; + bool shutdown; }; /** -- cgit v1.2.3 From 2dace10efb8b761ccbd18d524f3b14d823edf8c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:52 +0100 Subject: mac80211: clean up BA session teardown The sta_info pointer can very well be passed to ieee80211_sta_tear_down_BA_sessions, this will later allow us to pass it through even further. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ht.c | 8 ++++---- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/iface.c | 3 +-- net/mac80211/mlme.c | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index a49a8a5828bf..1b503f3cc54c 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -153,15 +153,15 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, return changed; } -void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr) +void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta) { - struct ieee80211_local *local = sdata->local; + struct ieee80211_local *local = sta->local; int i; for (i = 0; i < STA_TID_NUM; i++) { - ieee80211_stop_tx_ba_session(&local->hw, addr, i, + ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, i, WLAN_BACK_INITIATOR); - ieee80211_sta_stop_rx_ba_session(sdata, addr, i, + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, i, WLAN_BACK_RECIPIENT, WLAN_REASON_QSTA_LEAVE_QBSS); } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6987dfa41c7f..c6858ecde312 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -993,7 +993,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, u16 initiator, u16 reason); -void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr); +void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct ieee80211_mgmt *mgmt, size_t len); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 915d04323a32..1c17fb8e4058 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -362,8 +362,7 @@ static int ieee80211_stop(struct net_device *dev) list_for_each_entry_rcu(sta, &local->sta_list, list) { if (sta->sdata == sdata) - ieee80211_sta_tear_down_BA_sessions(sdata, - sta->sta.addr); + ieee80211_sta_tear_down_BA_sessions(sta); } rcu_read_unlock(); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 169f10c51042..bfc47b330687 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -954,7 +954,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); - ieee80211_sta_tear_down_BA_sessions(sdata, sta->sta.addr); + ieee80211_sta_tear_down_BA_sessions(sta); if (self_disconnected) { if (deauth) -- cgit v1.2.3 From d75636ef9c1af224f1097941879d5a8db7cd04e5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:53 +0100 Subject: mac80211: RX aggregation: clean up stop session Clean up the locking by splitting it into two functions, this will also enable further cleanups of stopping all sessions. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 57 +++++++++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 4b571b211625..bb1f8740cbd5 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -17,47 +17,32 @@ #include #include "ieee80211_i.h" -void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, - u16 initiator, u16 reason) +static void __ieee80211_sta_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason) { - struct ieee80211_local *local = sdata->local; + struct ieee80211_local *local = sta->local; struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; - int ret, i; - - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - return; - } + int i; /* check if TID is in operational state */ spin_lock_bh(&sta->lock); - if (sta->ampdu_mlme.tid_state_rx[tid] - != HT_AGG_STATE_OPERATIONAL) { + if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) { spin_unlock_bh(&sta->lock); - rcu_read_unlock(); return; } + sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); spin_unlock_bh(&sta->lock); - /* stop HW Rx aggregation. ampdu_action existence - * already verified in session init so we add the BUG_ON */ - BUG_ON(!local->ops->ampdu_action); - #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n", - ra, tid); + sta->sta.addr, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, - &sta->sta, tid, NULL); - if (ret) + if (local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, + &sta->sta, tid, NULL)) printk(KERN_DEBUG "HW problem - can not stop rx " "aggregation for tid %d\n", tid); @@ -67,7 +52,8 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER) - ieee80211_send_delba(sdata, ra, tid, 0, reason); + ieee80211_send_delba(sta->sdata, sta->sta.addr, + tid, 0, reason); /* free the reordering buffer */ for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) { @@ -90,6 +76,27 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE; spin_unlock_bh(&sta->lock); +} + +void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, + u16 initiator, u16 reason) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + /* stop HW Rx aggregation. ampdu_action existence + * already verified in session init so we add the BUG_ON */ + BUG_ON(!local->ops->ampdu_action); + + rcu_read_lock(); + + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); + return; + } + + __ieee80211_sta_stop_rx_ba_session(sta, tid, initiator, reason); rcu_read_unlock(); } -- cgit v1.2.3 From 849b7967818995a32c3017542e33eb3155944368 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:54 +0100 Subject: mac80211: further cleanups to stopping BA sessions Essentially consisting of passing the sta_info pointer around, instead of repeatedly doing hash lookups. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 6 ++--- net/mac80211/agg-tx.c | 63 ++++++++++++++++++++++++---------------------- net/mac80211/ht.c | 9 +++---- net/mac80211/ieee80211_i.h | 5 ++++ 4 files changed, 44 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index bb1f8740cbd5..3112bfd441b6 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -17,8 +17,8 @@ #include #include "ieee80211_i.h" -static void __ieee80211_sta_stop_rx_ba_session(struct sta_info *sta, u16 tid, - u16 initiator, u16 reason) +void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason) { struct ieee80211_local *local = sta->local; struct ieee80211_hw *hw = &local->hw; @@ -96,7 +96,7 @@ void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *r return; } - __ieee80211_sta_stop_rx_ba_session(sta, tid, initiator, reason); + __ieee80211_stop_rx_ba_session(sta, tid, initiator, reason); rcu_read_unlock(); } diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index a49b76f61da3..1232d9f01ca9 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -123,10 +123,10 @@ void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u1 ieee80211_tx_skb(sdata, skb, 0); } -static int __ieee80211_stop_tx_ba_session(struct ieee80211_local *local, - struct sta_info *sta, u16 tid, - enum ieee80211_back_parties initiator) +static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator) { + struct ieee80211_local *local = sta->local; int ret; u8 *state; @@ -165,7 +165,6 @@ static void sta_addba_resp_timer_expired(unsigned long data) u16 tid = *(u8 *)data; struct sta_info *sta = container_of((void *)data, struct sta_info, timer_to_tid[tid]); - struct ieee80211_local *local = sta->local; u8 *state; state = &sta->ampdu_mlme.tid_state_tx[tid]; @@ -186,7 +185,7 @@ static void sta_addba_resp_timer_expired(unsigned long data) printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); #endif - __ieee80211_stop_tx_ba_session(local, sta, tid, WLAN_BACK_INITIATOR); + ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); spin_unlock_bh(&sta->lock); } @@ -427,6 +426,32 @@ void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, } EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); +int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator) +{ + u8 *state; + int ret; + + /* check if the TID is in aggregation */ + state = &sta->ampdu_mlme.tid_state_tx[tid]; + spin_lock_bh(&sta->lock); + + if (*state != HT_AGG_STATE_OPERATIONAL) { + ret = -ENOENT; + goto unlock; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", + sta->sta.addr, tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + ret = ___ieee80211_stop_tx_ba_session(sta, tid, initiator); + + unlock: + spin_unlock_bh(&sta->lock); + return ret; +} int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid, @@ -434,7 +459,6 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, { struct ieee80211_local *local = hw_to_local(hw); struct sta_info *sta; - u8 *state; int ret = 0; if (WARN_ON(!local->ops->ampdu_action)) @@ -450,27 +474,8 @@ int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, return -ENOENT; } - /* check if the TID is in aggregation */ - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); - - if (*state != HT_AGG_STATE_OPERATIONAL) { - ret = -ENOENT; - goto unlock; - } - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Tx BA session stop requested for %pM tid %u\n", - ra, tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - ret = __ieee80211_stop_tx_ba_session(local, sta, tid, initiator); - - unlock: - spin_unlock_bh(&sta->lock); - + ret = __ieee80211_stop_tx_ba_session(sta, tid, initiator); rcu_read_unlock(); - return ret; } EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); @@ -623,11 +628,9 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - spin_unlock_bh(&sta->lock); } else { sta->ampdu_mlme.addba_req_num[tid]++; - __ieee80211_stop_tx_ba_session(local, sta, tid, - WLAN_BACK_INITIATOR); - spin_unlock_bh(&sta->lock); + ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); } + spin_unlock_bh(&sta->lock); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 1b503f3cc54c..82ea0b63a386 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -155,15 +155,12 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta) { - struct ieee80211_local *local = sta->local; int i; for (i = 0; i < STA_TID_NUM; i++) { - ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, i, - WLAN_BACK_INITIATOR); - ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, i, - WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS); + __ieee80211_stop_tx_ba_session(sta, i, WLAN_BACK_INITIATOR); + __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS); } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index c6858ecde312..9122416fd6af 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -993,6 +993,8 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, u16 initiator, u16 reason); +void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, + u16 initiator, u16 reason); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, @@ -1006,6 +1008,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len); +int __ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, + enum ieee80211_back_parties initiator); + /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, -- cgit v1.2.3 From 2a5193119269062608582418deba7af82844159a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:55 +0100 Subject: cfg80211/nl80211: scanning (and mac80211 update to use it) This patch adds basic scan capability to cfg80211/nl80211 and changes mac80211 to use it. The BSS list that cfg80211 maintains is made driver-accessible with a private area in each BSS struct, but mac80211 doesn't yet use it. That's another large project. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 20 ++ net/mac80211/ieee80211_i.h | 18 +- net/mac80211/iface.c | 2 +- net/mac80211/main.c | 32 +- net/mac80211/mlme.c | 37 ++- net/mac80211/scan.c | 356 ++++---------------- net/mac80211/wext.c | 59 +--- net/wireless/Makefile | 2 +- net/wireless/core.c | 8 + net/wireless/core.h | 20 ++ net/wireless/nl80211.c | 323 ++++++++++++++++++ net/wireless/nl80211.h | 8 + net/wireless/scan.c | 807 +++++++++++++++++++++++++++++++++++++++++++++ 13 files changed, 1318 insertions(+), 374 deletions(-) create mode 100644 net/wireless/scan.c (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 42d692fd9bec..c8d969be440b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1277,6 +1277,25 @@ static int ieee80211_resume(struct wiphy *wiphy) #define ieee80211_resume NULL #endif +static int ieee80211_scan(struct wiphy *wiphy, + struct net_device *dev, + struct cfg80211_scan_request *req) +{ + struct ieee80211_sub_if_data *sdata; + + if (!netif_running(dev)) + return -ENETDOWN; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + return ieee80211_request_scan(sdata, req); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1309,4 +1328,5 @@ struct cfg80211_ops mac80211_config_ops = { .set_mgmt_extra_ie = ieee80211_set_mgmt_extra_ie, .suspend = ieee80211_suspend, .resume = ieee80211_resume, + .scan = ieee80211_scan, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9122416fd6af..cbc0b7d647f9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -294,8 +294,6 @@ struct ieee80211_if_sta { u8 ssid[IEEE80211_MAX_SSID_LEN]; enum ieee80211_sta_mlme_state state; size_t ssid_len; - u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; - size_t scan_ssid_len; u16 aid; u16 ap_capab, capab; u8 *extra_ie; /* to be added to the end of AssocReq */ @@ -658,17 +656,18 @@ struct ieee80211_local { /* Scanning and BSS list */ bool sw_scanning, hw_scanning; + struct cfg80211_ssid scan_ssid; + struct cfg80211_scan_request int_scan_req; + struct cfg80211_scan_request *scan_req; + struct ieee80211_channel *scan_channel; int scan_channel_idx; - enum ieee80211_band scan_band; enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; unsigned long last_scan_completed; struct delayed_work scan_work; struct ieee80211_sub_if_data *scan_sdata; - struct ieee80211_channel *oper_channel, *scan_channel, *csa_channel; enum nl80211_channel_type oper_channel_type; - u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; - size_t scan_ssid_len; + struct ieee80211_channel *oper_channel, *csa_channel; struct list_head bss_list; struct ieee80211_bss *bss_hash[STA_HASH_SIZE]; spinlock_t bss_lock; @@ -929,7 +928,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local, /* scan/BSS handling */ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, - u8 *ssid, size_t ssid_len); + struct cfg80211_scan_request *req); int ieee80211_scan_results(struct ieee80211_local *local, struct iw_request_info *info, char *buf, size_t len); @@ -944,14 +943,15 @@ int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, - u8 *ssid, size_t ssid_len); + struct cfg80211_scan_request *req); struct ieee80211_bss * ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_rx_status *rx_status, struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems, - int freq, bool beacon); + struct ieee80211_channel *channel, + bool beacon); struct ieee80211_bss * ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq, u8 *ssid, u8 ssid_len); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1c17fb8e4058..df94b9365264 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -522,7 +522,7 @@ static int ieee80211_stop(struct net_device *dev) * scan event to userspace -- the scan is incomplete. */ if (local->sw_scanning) - ieee80211_scan_completed(&local->hw); + ieee80211_scan_completed(&local->hw, true); } conf.vif = &sdata->vif; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 956afea4214d..954edfbb6b6f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -733,6 +733,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, return NULL; wiphy->privid = mac80211_wiphy_privid; + wiphy->max_scan_ssids = 4; local = wiphy_priv(wiphy); local->hw.wiphy = wiphy; @@ -817,25 +818,33 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) enum ieee80211_band band; struct net_device *mdev; struct ieee80211_master_priv *mpriv; + int channels, i, j; /* * generic code guarantees at least one band, * set this very early because much code assumes * that hw.conf.channel is assigned */ + channels = 0; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; sband = local->hw.wiphy->bands[band]; - if (sband) { + if (sband && !local->oper_channel) { /* init channel we're on */ local->hw.conf.channel = local->oper_channel = local->scan_channel = &sband->channels[0]; - break; } + if (sband) + channels += sband->n_channels; } + local->int_scan_req.n_channels = channels; + local->int_scan_req.channels = kzalloc(sizeof(void *) * channels, GFP_KERNEL); + if (!local->int_scan_req.channels) + return -ENOMEM; + /* if low-level driver supports AP, we also support VLAN */ if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); @@ -845,7 +854,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) result = wiphy_register(local->hw.wiphy); if (result < 0) - return result; + goto fail_wiphy_register; /* * We use the number of queues for feature tests (QoS, HT) internally @@ -948,6 +957,20 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) ieee80211_led_init(local); + /* alloc internal scan request */ + i = 0; + local->int_scan_req.ssids = &local->scan_ssid; + local->int_scan_req.n_ssids = 1; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + if (!hw->wiphy->bands[band]) + continue; + for (j = 0; j < hw->wiphy->bands[band]->n_channels; j++) { + local->int_scan_req.channels[i] = + &hw->wiphy->bands[band]->channels[j]; + i++; + } + } + return 0; fail_wep: @@ -966,6 +989,8 @@ fail_workqueue: free_netdev(local->mdev); fail_mdev_alloc: wiphy_unregister(local->hw.wiphy); +fail_wiphy_register: + kfree(local->int_scan_req.channels); return result; } EXPORT_SYMBOL(ieee80211_register_hw); @@ -1011,6 +1036,7 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) ieee80211_wep_free(local); ieee80211_led_exit(local); free_netdev(local->mdev); + kfree(local->int_scan_req.channels); } EXPORT_SYMBOL(ieee80211_unregister_hw); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bfc47b330687..46b4817cdea9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1743,7 +1743,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, - freq, beacon); + channel, beacon); if (!bss) return; @@ -2162,7 +2162,15 @@ static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " "IBSS networks with same SSID (merge)\n", sdata->dev->name); - ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len); + + /* XXX maybe racy? */ + if (sdata->local->scan_req) + return; + + memcpy(sdata->local->int_scan_req.ssids[0].ssid, + ifsta->ssid, IEEE80211_MAX_SSID_LEN); + sdata->local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; + ieee80211_request_scan(sdata, &sdata->local->int_scan_req); } @@ -2378,8 +2386,15 @@ dont_join: IEEE80211_SCAN_INTERVAL)) { printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " "join\n", sdata->dev->name); - return ieee80211_request_scan(sdata, ifsta->ssid, - ifsta->ssid_len); + + /* XXX maybe racy? */ + if (local->scan_req) + return -EBUSY; + + memcpy(local->int_scan_req.ssids[0].ssid, + ifsta->ssid, IEEE80211_MAX_SSID_LEN); + local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; + return ieee80211_request_scan(sdata, &local->int_scan_req); } else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) { int interval = IEEE80211_SCAN_INTERVAL; @@ -2478,11 +2493,16 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, } else { if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { ifsta->assoc_scan_tries++; + /* XXX maybe racy? */ + if (local->scan_req) + return -1; + memcpy(local->int_scan_req.ssids[0].ssid, + ifsta->ssid, IEEE80211_MAX_SSID_LEN); if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) - ieee80211_start_scan(sdata, NULL, 0); + local->int_scan_req.ssids[0].ssid_len = 0; else - ieee80211_start_scan(sdata, ifsta->ssid, - ifsta->ssid_len); + local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; + ieee80211_start_scan(sdata, &local->int_scan_req); ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); } else { @@ -2520,8 +2540,7 @@ static void ieee80211_sta_work(struct work_struct *work) ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && ifsta->state != IEEE80211_STA_MLME_ASSOCIATE && test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { - ieee80211_start_scan(sdata, ifsta->scan_ssid, - ifsta->scan_ssid_len); + ieee80211_start_scan(sdata, local->scan_req); return; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index eddca4e1e13c..c6b275b10cf9 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -13,6 +13,9 @@ */ /* TODO: + * figure out how to avoid that the "current BSS" expires + * clean up IBSS code (in MLME), see why it adds a BSS to the list + * use cfg80211's BSS handling (depends on IBSS TODO above) * order BSS list by RSSI(?) ("quality of AP") * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, * SSID) @@ -225,10 +228,26 @@ ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len, struct ieee802_11_elems *elems, - int freq, bool beacon) + struct ieee80211_channel *channel, + bool beacon) { struct ieee80211_bss *bss; - int clen; + int clen, freq = channel->center_freq; + enum cfg80211_signal_type sigtype = CFG80211_SIGNAL_TYPE_NONE; + s32 signal = 0; + + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + sigtype = CFG80211_SIGNAL_TYPE_MBM; + signal = rx_status->signal * 100; + } else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) { + sigtype = CFG80211_SIGNAL_TYPE_UNSPEC; + signal = (rx_status->signal * 100) / local->hw.max_signal; + } + + cfg80211_put_bss( + cfg80211_inform_bss_frame(local->hw.wiphy, channel, + mgmt, len, signal, sigtype, + GFP_ATOMIC)); #ifdef CONFIG_MAC80211_MESH if (elems->mesh_config) @@ -401,7 +420,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, bss = ieee80211_bss_info_update(sdata->local, rx_status, mgmt, skb->len, &elems, - freq, beacon); + channel, beacon); if (bss) ieee80211_rx_bss_put(sdata->local, bss); @@ -439,26 +458,22 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, ieee80211_tx_skb(sdata, skb, 0); } -void ieee80211_scan_completed(struct ieee80211_hw *hw) +void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; - union iwreq_data wrqu; if (WARN_ON(!local->hw_scanning && !local->sw_scanning)) return; - local->last_scan_completed = jiffies; - memset(&wrqu, 0, sizeof(wrqu)); + if (WARN_ON(!local->scan_req)) + return; - /* - * local->scan_sdata could have been NULLed by the interface - * down code in case we were scanning on an interface that is - * being taken down. - */ - sdata = local->scan_sdata; - if (sdata) - wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL); + if (local->scan_req != &local->int_scan_req) + cfg80211_scan_done(local->scan_req, aborted); + local->scan_req = NULL; + + local->last_scan_completed = jiffies; if (local->hw_scanning) { local->hw_scanning = false; @@ -520,9 +535,8 @@ void ieee80211_scan_work(struct work_struct *work) struct ieee80211_local *local = container_of(work, struct ieee80211_local, scan_work.work); struct ieee80211_sub_if_data *sdata = local->scan_sdata; - struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; - int skip; + int skip, i; unsigned long next_delay = 0; /* @@ -533,33 +547,13 @@ void ieee80211_scan_work(struct work_struct *work) switch (local->scan_state) { case SCAN_SET_CHANNEL: - /* - * Get current scan band. scan_band may be IEEE80211_NUM_BANDS - * after we successfully scanned the last channel of the last - * band (and the last band is supported by the hw) - */ - if (local->scan_band < IEEE80211_NUM_BANDS) - sband = local->hw.wiphy->bands[local->scan_band]; - else - sband = NULL; - - /* - * If we are at an unsupported band and have more bands - * left to scan, advance to the next supported one. - */ - while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) { - local->scan_band++; - sband = local->hw.wiphy->bands[local->scan_band]; - local->scan_channel_idx = 0; - } - /* if no more bands/channels left, complete scan */ - if (!sband || local->scan_channel_idx >= sband->n_channels) { - ieee80211_scan_completed(local_to_hw(local)); + if (local->scan_channel_idx >= local->scan_req->n_channels) { + ieee80211_scan_completed(local_to_hw(local), false); return; } skip = 0; - chan = &sband->channels[local->scan_channel_idx]; + chan = local->scan_req->channels[local->scan_channel_idx]; if (chan->flags & IEEE80211_CHAN_DISABLED || (sdata->vif.type == NL80211_IFTYPE_ADHOC && @@ -575,15 +569,6 @@ void ieee80211_scan_work(struct work_struct *work) /* advance state machine to next channel/band */ local->scan_channel_idx++; - if (local->scan_channel_idx >= sband->n_channels) { - /* - * scan_band may end up == IEEE80211_NUM_BANDS, but - * we'll catch that case above and complete the scan - * if that is the case. - */ - local->scan_band++; - local->scan_channel_idx = 0; - } if (skip) break; @@ -596,10 +581,14 @@ void ieee80211_scan_work(struct work_struct *work) next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; local->scan_state = SCAN_SET_CHANNEL; - if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN) + if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN || + !local->scan_req->n_ssids) break; - ieee80211_send_probe_req(sdata, NULL, local->scan_ssid, - local->scan_ssid_len); + for (i = 0; i < local->scan_req->n_ssids; i++) + ieee80211_send_probe_req( + sdata, NULL, + local->scan_req->ssids[i].ssid, + local->scan_req->ssids[i].ssid_len); next_delay = IEEE80211_CHANNEL_TIME; break; } @@ -610,14 +599,19 @@ void ieee80211_scan_work(struct work_struct *work) int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, - u8 *ssid, size_t ssid_len) + struct cfg80211_scan_request *req) { struct ieee80211_local *local = scan_sdata->local; struct ieee80211_sub_if_data *sdata; - if (ssid_len > IEEE80211_MAX_SSID_LEN) + if (!req) return -EINVAL; + if (local->scan_req && local->scan_req != req) + return -EBUSY; + + local->scan_req = req; + /* MLME-SCAN.request (page 118) page 144 (11.1.3.1) * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS * BSSID: MACAddress @@ -645,7 +639,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, int rc; local->hw_scanning = true; - rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len); + rc = local->ops->hw_scan(local_to_hw(local), req); if (rc) { local->hw_scanning = false; return rc; @@ -678,15 +672,10 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, } mutex_unlock(&local->iflist_mtx); - if (ssid) { - local->scan_ssid_len = ssid_len; - memcpy(local->scan_ssid, ssid, ssid_len); - } else - local->scan_ssid_len = 0; local->scan_state = SCAN_SET_CHANNEL; local->scan_channel_idx = 0; - local->scan_band = IEEE80211_BAND_2GHZ; local->scan_sdata = scan_sdata; + local->scan_req = req; netif_addr_lock_bh(local->mdev); local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; @@ -706,13 +695,21 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, - u8 *ssid, size_t ssid_len) + struct cfg80211_scan_request *req) { struct ieee80211_local *local = sdata->local; struct ieee80211_if_sta *ifsta; + if (!req) + return -EINVAL; + + if (local->scan_req && local->scan_req != req) + return -EBUSY; + + local->scan_req = req; + if (sdata->vif.type != NL80211_IFTYPE_STATION) - return ieee80211_start_scan(sdata, ssid, ssid_len); + return ieee80211_start_scan(sdata, req); /* * STA has a state machine that might need to defer scanning @@ -727,241 +724,8 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, } ifsta = &sdata->u.sta; - - ifsta->scan_ssid_len = ssid_len; - if (ssid_len) - memcpy(ifsta->scan_ssid, ssid, ssid_len); set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); queue_work(local->hw.workqueue, &ifsta->work); return 0; } - - -static void ieee80211_scan_add_ies(struct iw_request_info *info, - struct ieee80211_bss *bss, - char **current_ev, char *end_buf) -{ - u8 *pos, *end, *next; - struct iw_event iwe; - - if (bss == NULL || bss->ies == NULL) - return; - - /* - * If needed, fragment the IEs buffer (at IE boundaries) into short - * enough fragments to fit into IW_GENERIC_IE_MAX octet messages. - */ - pos = bss->ies; - end = pos + bss->ies_len; - - while (end - pos > IW_GENERIC_IE_MAX) { - next = pos + 2 + pos[1]; - while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX) - next = next + 2 + next[1]; - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = next - pos; - *current_ev = iwe_stream_add_point(info, *current_ev, - end_buf, &iwe, pos); - - pos = next; - } - - if (end > pos) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = end - pos; - *current_ev = iwe_stream_add_point(info, *current_ev, - end_buf, &iwe, pos); - } -} - - -static char * -ieee80211_scan_result(struct ieee80211_local *local, - struct iw_request_info *info, - struct ieee80211_bss *bss, - char *current_ev, char *end_buf) -{ - struct iw_event iwe; - char *buf; - - if (time_after(jiffies, - bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE)) - return current_ev; - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWAP; - iwe.u.ap_addr.sa_family = ARPHRD_ETHER; - memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN); - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_ADDR_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWESSID; - if (bss_mesh_cfg(bss)) { - iwe.u.data.length = bss_mesh_id_len(bss); - iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, bss_mesh_id(bss)); - } else { - iwe.u.data.length = bss->ssid_len; - iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, bss->ssid); - } - - if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) - || bss_mesh_cfg(bss)) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWMODE; - if (bss_mesh_cfg(bss)) - iwe.u.mode = IW_MODE_MESH; - else if (bss->capability & WLAN_CAPABILITY_ESS) - iwe.u.mode = IW_MODE_MASTER; - else - iwe.u.mode = IW_MODE_ADHOC; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, - &iwe, IW_EV_UINT_LEN); - } - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); - iwe.u.freq.e = 0; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_FREQ_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = bss->freq; - iwe.u.freq.e = 6; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_FREQ_LEN); - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVQUAL; - iwe.u.qual.qual = bss->qual; - iwe.u.qual.level = bss->signal; - iwe.u.qual.noise = bss->noise; - iwe.u.qual.updated = local->wstats_flags; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_QUAL_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWENCODE; - if (bss->capability & WLAN_CAPABILITY_PRIVACY) - iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; - else - iwe.u.data.flags = IW_ENCODE_DISABLED; - iwe.u.data.length = 0; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, ""); - - ieee80211_scan_add_ies(info, bss, ¤t_ev, end_buf); - - if (bss->supp_rates_len > 0) { - /* display all supported rates in readable format */ - char *p = current_ev + iwe_stream_lcp_len(info); - int i; - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWRATE; - /* Those two flags are ignored... */ - iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; - - for (i = 0; i < bss->supp_rates_len; i++) { - iwe.u.bitrate.value = ((bss->supp_rates[i] & - 0x7f) * 500000); - p = iwe_stream_add_value(info, current_ev, p, - end_buf, &iwe, IW_EV_PARAM_LEN); - } - current_ev = p; - } - - buf = kmalloc(30, GFP_ATOMIC); - if (buf) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp)); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, buf); - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, " Last beacon: %dms ago", - jiffies_to_msecs(jiffies - bss->last_update)); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, &iwe, buf); - kfree(buf); - } - - if (bss_mesh_cfg(bss)) { - u8 *cfg = bss_mesh_cfg(bss); - buf = kmalloc(50, GFP_ATOMIC); - if (buf) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, "Mesh network (version %d)", cfg[0]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - sprintf(buf, "Path Selection Protocol ID: " - "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3], - cfg[4]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - sprintf(buf, "Path Selection Metric ID: " - "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7], - cfg[8]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - sprintf(buf, "Congestion Control Mode ID: " - "0x%02X%02X%02X%02X", cfg[9], cfg[10], - cfg[11], cfg[12]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - sprintf(buf, "Channel Precedence: " - "0x%02X%02X%02X%02X", cfg[13], cfg[14], - cfg[15], cfg[16]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - kfree(buf); - } - } - - return current_ev; -} - - -int ieee80211_scan_results(struct ieee80211_local *local, - struct iw_request_info *info, - char *buf, size_t len) -{ - char *current_ev = buf; - char *end_buf = buf + len; - struct ieee80211_bss *bss; - - spin_lock_bh(&local->bss_lock); - list_for_each_entry(bss, &local->bss_list, list) { - if (buf + len - current_ev <= IW_EV_ADDR_LEN) { - spin_unlock_bh(&local->bss_lock); - return -E2BIG; - } - current_ev = ieee80211_scan_result(local, info, bss, - current_ev, end_buf); - } - spin_unlock_bh(&local->bss_lock); - return current_ev - buf; -} diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index acd5808b87f4..b337d7d5edb3 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -173,8 +173,9 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->num_encoding_sizes = 2; range->max_encoding_tokens = NUM_DEFAULT_KEYS; + /* cfg80211 requires this, and enforces 0..100 */ if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) - range->max_qual.level = local->hw.max_signal; + range->max_qual.level = 100; else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) range->max_qual.level = -110; else @@ -415,58 +416,6 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, } -static int ieee80211_ioctl_siwscan(struct net_device *dev, - struct iw_request_info *info, - union iwreq_data *wrqu, char *extra) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct iw_scan_req *req = NULL; - u8 *ssid = NULL; - size_t ssid_len = 0; - - if (!netif_running(dev)) - return -ENETDOWN; - - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_ADHOC && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT) - return -EOPNOTSUPP; - - /* if SSID was specified explicitly then use that */ - if (wrqu->data.length == sizeof(struct iw_scan_req) && - wrqu->data.flags & IW_SCAN_THIS_ESSID) { - req = (struct iw_scan_req *)extra; - ssid = req->essid; - ssid_len = req->essid_len; - } - - return ieee80211_request_scan(sdata, ssid, ssid_len); -} - - -static int ieee80211_ioctl_giwscan(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *data, char *extra) -{ - int res; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (local->sw_scanning || local->hw_scanning) - return -EAGAIN; - - res = ieee80211_scan_results(local, info, extra, data->length); - if (res >= 0) { - data->length = res; - return 0; - } - data->length = 0; - return res; -} - - static int ieee80211_ioctl_siwrate(struct net_device *dev, struct iw_request_info *info, struct iw_param *rate, char *extra) @@ -1165,8 +1114,8 @@ static const iw_handler ieee80211_handler[] = (iw_handler) ieee80211_ioctl_giwap, /* SIOCGIWAP */ (iw_handler) ieee80211_ioctl_siwmlme, /* SIOCSIWMLME */ (iw_handler) NULL, /* SIOCGIWAPLIST */ - (iw_handler) ieee80211_ioctl_siwscan, /* SIOCSIWSCAN */ - (iw_handler) ieee80211_ioctl_giwscan, /* SIOCGIWSCAN */ + (iw_handler) cfg80211_wext_siwscan, /* SIOCSIWSCAN */ + (iw_handler) cfg80211_wext_giwscan, /* SIOCGIWSCAN */ (iw_handler) ieee80211_ioctl_siwessid, /* SIOCSIWESSID */ (iw_handler) ieee80211_ioctl_giwessid, /* SIOCGIWESSID */ (iw_handler) NULL, /* SIOCSIWNICKN */ diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 938a334c8dbc..dad43c24f695 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o -cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o +cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o cfg80211-$(CONFIG_NL80211) += nl80211.o diff --git a/net/wireless/core.c b/net/wireless/core.c index 125226476089..3cccd1390cea 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -240,6 +240,8 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_init(&drv->mtx); mutex_init(&drv->devlist_mtx); INIT_LIST_HEAD(&drv->netdev_list); + spin_lock_init(&drv->bss_lock); + INIT_LIST_HEAD(&drv->bss_list); device_initialize(&drv->wiphy.dev); drv->wiphy.dev.class = &ieee80211_class; @@ -259,6 +261,9 @@ int wiphy_register(struct wiphy *wiphy) int i; u16 ifmodes = wiphy->interface_modes; + if (WARN_ON(wiphy->max_scan_ssids < 1)) + return -EINVAL; + /* sanity check ifmodes */ WARN_ON(!ifmodes); ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; @@ -367,8 +372,11 @@ EXPORT_SYMBOL(wiphy_unregister); void cfg80211_dev_free(struct cfg80211_registered_device *drv) { + struct cfg80211_internal_bss *scan, *tmp; mutex_destroy(&drv->mtx); mutex_destroy(&drv->devlist_mtx); + list_for_each_entry_safe(scan, tmp, &drv->bss_list, list) + kfree(scan); kfree(drv); } diff --git a/net/wireless/core.h b/net/wireless/core.h index f7fb9f413028..e29ad4cd464f 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -8,6 +8,8 @@ #include #include #include +#include +#include #include #include #include @@ -41,6 +43,13 @@ struct cfg80211_registered_device { struct mutex devlist_mtx; struct list_head netdev_list; + /* BSSes/scanning */ + spinlock_t bss_lock; + struct list_head bss_list; + struct rb_root bss_tree; + u32 bss_generation; + struct cfg80211_scan_request *scan_req; /* protected by RTNL */ + /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN))); @@ -56,6 +65,15 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) extern struct mutex cfg80211_drv_mutex; extern struct list_head cfg80211_drv_list; +struct cfg80211_internal_bss { + struct list_head list; + struct rb_node rbn; + unsigned long ts; + struct kref ref; + /* must be last because of priv member */ + struct cfg80211_bss pub; +}; + /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. @@ -94,4 +112,6 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv, void ieee80211_set_bitrate_flags(struct wiphy *wiphy); void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); +void cfg80211_bss_expire(struct cfg80211_registered_device *dev); + #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d452396006ee..298a4de59948 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include "core.h" @@ -109,6 +110,8 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 }, [NL80211_ATTR_IE] = { .type = NLA_BINARY, .len = IEEE80211_MAX_DATA_LEN }, + [NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED }, + [NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED }, }; /* message building helper */ @@ -141,6 +144,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, + dev->wiphy.max_scan_ssids); nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) @@ -2270,6 +2275,246 @@ static int nl80211_set_mgmt_extra_ie(struct sk_buff *skb, return err; } +static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct cfg80211_scan_request *request; + struct cfg80211_ssid *ssid; + struct ieee80211_channel *channel; + struct nlattr *attr; + struct wiphy *wiphy; + int err, tmp, n_ssids = 0, n_channels = 0, i; + enum ieee80211_band band; + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + wiphy = &drv->wiphy; + + if (!drv->ops->scan) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + + if (drv->scan_req) { + err = -EBUSY; + goto out_unlock; + } + + if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) + n_channels++; + if (!n_channels) { + err = -EINVAL; + goto out_unlock; + } + } else { + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + if (wiphy->bands[band]) + n_channels += wiphy->bands[band]->n_channels; + } + + if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) + n_ssids++; + + if (n_ssids > wiphy->max_scan_ssids) { + err = -EINVAL; + goto out_unlock; + } + + request = kzalloc(sizeof(*request) + + sizeof(*ssid) * n_ssids + + sizeof(channel) * n_channels, GFP_KERNEL); + if (!request) { + err = -ENOMEM; + goto out_unlock; + } + + request->channels = (void *)((char *)request + sizeof(*request)); + request->n_channels = n_channels; + if (n_ssids) + request->ssids = (void *)(request->channels + n_channels); + request->n_ssids = n_ssids; + + if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { + /* user specified, bail out if channel not found */ + request->n_channels = n_channels; + i = 0; + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) { + request->channels[i] = ieee80211_get_channel(wiphy, nla_get_u32(attr)); + if (!request->channels[i]) { + err = -EINVAL; + goto out_free; + } + i++; + } + } else { + /* all channels */ + i = 0; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + int j; + if (!wiphy->bands[band]) + continue; + for (j = 0; j < wiphy->bands[band]->n_channels; j++) { + request->channels[i] = &wiphy->bands[band]->channels[j]; + i++; + } + } + } + + i = 0; + if (info->attrs[NL80211_ATTR_SCAN_SSIDS]) { + nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SSIDS], tmp) { + if (request->ssids[i].ssid_len > IEEE80211_MAX_SSID_LEN) { + err = -EINVAL; + goto out_free; + } + memcpy(request->ssids[i].ssid, nla_data(attr), nla_len(attr)); + request->ssids[i].ssid_len = nla_len(attr); + i++; + } + } + + request->ifidx = dev->ifindex; + request->wiphy = &drv->wiphy; + + drv->scan_req = request; + err = drv->ops->scan(&drv->wiphy, dev, request); + + out_free: + if (err) { + drv->scan_req = NULL; + kfree(request); + } + out_unlock: + rtnl_unlock(); + out: + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, + struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct cfg80211_bss *res) +{ + void *hdr; + struct nlattr *bss; + + hdr = nl80211hdr_put(msg, pid, seq, flags, + NL80211_CMD_NEW_SCAN_RESULTS); + if (!hdr) + return -1; + + NLA_PUT_U32(msg, NL80211_ATTR_SCAN_GENERATION, + rdev->bss_generation); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); + + bss = nla_nest_start(msg, NL80211_ATTR_BSS); + if (!bss) + goto nla_put_failure; + if (!is_zero_ether_addr(res->bssid)) + NLA_PUT(msg, NL80211_BSS_BSSID, ETH_ALEN, res->bssid); + if (res->information_elements && res->len_information_elements) + NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS, + res->len_information_elements, + res->information_elements); + if (res->tsf) + NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf); + if (res->beacon_interval) + NLA_PUT_U16(msg, NL80211_BSS_BEACON_INTERVAL, res->beacon_interval); + NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability); + NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq); + + switch (res->signal_type) { + case CFG80211_SIGNAL_TYPE_MBM: + NLA_PUT_U32(msg, NL80211_BSS_SIGNAL_MBM, res->signal); + break; + case CFG80211_SIGNAL_TYPE_UNSPEC: + NLA_PUT_U8(msg, NL80211_BSS_SIGNAL_UNSPEC, res->signal); + break; + default: + break; + } + + nla_nest_end(msg, bss); + + return genlmsg_end(msg, hdr); + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nl80211_dump_scan(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct cfg80211_registered_device *dev; + struct net_device *netdev; + struct cfg80211_internal_bss *scan; + int ifidx = cb->args[0]; + int start = cb->args[1], idx = 0; + int err; + + if (!ifidx) { + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, + nl80211_fam.attrbuf, nl80211_fam.maxattr, + nl80211_policy); + if (err) + return err; + + if (!nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]) + return -EINVAL; + + ifidx = nla_get_u32(nl80211_fam.attrbuf[NL80211_ATTR_IFINDEX]); + if (!ifidx) + return -EINVAL; + cb->args[0] = ifidx; + } + + netdev = dev_get_by_index(&init_net, ifidx); + if (!netdev) + return -ENODEV; + + dev = cfg80211_get_dev_from_ifindex(ifidx); + if (IS_ERR(dev)) { + err = PTR_ERR(dev); + goto out_put_netdev; + } + + spin_lock_bh(&dev->bss_lock); + cfg80211_bss_expire(dev); + + list_for_each_entry(scan, &dev->bss_list, list) { + if (++idx <= start) + continue; + if (nl80211_send_bss(skb, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + dev, netdev, &scan->pub) < 0) { + idx--; + goto out; + } + } + + out: + spin_unlock_bh(&dev->bss_lock); + + cb->args[1] = idx; + err = skb->len; + cfg80211_put_dev(dev); + out_put_netdev: + dev_put(netdev); + + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -2443,12 +2688,26 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_TRIGGER_SCAN, + .doit = nl80211_trigger_scan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_GET_SCAN, + .policy = nl80211_policy, + .dumpit = nl80211_dump_scan, + }, }; /* multicast groups */ static struct genl_multicast_group nl80211_config_mcgrp = { .name = "config", }; +static struct genl_multicast_group nl80211_scan_mcgrp = { + .name = "scan", +}; /* notification functions */ @@ -2468,6 +2727,66 @@ void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev) genlmsg_multicast(msg, 0, nl80211_config_mcgrp.id, GFP_KERNEL); } +static int nl80211_send_scan_donemsg(struct sk_buff *msg, + struct cfg80211_registered_device *rdev, + struct net_device *netdev, + u32 pid, u32 seq, int flags, + u32 cmd) +{ + void *hdr; + + hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + if (!hdr) + return -1; + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + + /* XXX: we should probably bounce back the request? */ + + return genlmsg_end(msg, hdr); + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, + struct net_device *netdev) +{ + struct sk_buff *msg; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return; + + if (nl80211_send_scan_donemsg(msg, rdev, netdev, 0, 0, 0, + NL80211_CMD_NEW_SCAN_RESULTS) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL); +} + +void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, + struct net_device *netdev) +{ + struct sk_buff *msg; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return; + + if (nl80211_send_scan_donemsg(msg, rdev, netdev, 0, 0, 0, + NL80211_CMD_SCAN_ABORTED) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL); +} + /* initialisation/exit functions */ int nl80211_init(void) @@ -2488,6 +2807,10 @@ int nl80211_init(void) if (err) goto err_out; + err = genl_register_mc_group(&nl80211_fam, &nl80211_scan_mcgrp); + if (err) + goto err_out; + return 0; err_out: genl_unregister_family(&nl80211_fam); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index f3ea5c029aee..b565a5f84e97 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -7,6 +7,10 @@ extern int nl80211_init(void); extern void nl80211_exit(void); extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); +extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, + struct net_device *netdev); +extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, + struct net_device *netdev); #else static inline int nl80211_init(void) { @@ -19,6 +23,10 @@ static inline void nl80211_notify_dev_rename( struct cfg80211_registered_device *rdev) { } +static inline void +nl80211_send_scan_done(struct cfg80211_registered_device *rdev, + struct net_device *netdev) +{} #endif /* CONFIG_NL80211 */ #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c new file mode 100644 index 000000000000..009d12810c55 --- /dev/null +++ b/net/wireless/scan.c @@ -0,0 +1,807 @@ +/* + * cfg80211 scan result handling + * + * Copyright 2008 Johannes Berg + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "core.h" +#include "nl80211.h" + +#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ) + +void cfg80211_scan_done(struct cfg80211_scan_request *request, bool aborted) +{ + struct net_device *dev; +#ifdef CONFIG_WIRELESS_EXT + union iwreq_data wrqu; +#endif + + dev = dev_get_by_index(&init_net, request->ifidx); + if (!dev) + goto out; + + WARN_ON(request != wiphy_to_dev(request->wiphy)->scan_req); + wiphy_to_dev(request->wiphy)->scan_req = NULL; + + if (aborted) + nl80211_send_scan_aborted(wiphy_to_dev(request->wiphy), dev); + else + nl80211_send_scan_done(wiphy_to_dev(request->wiphy), dev); + +#ifdef CONFIG_WIRELESS_EXT + if (!aborted) { + memset(&wrqu, 0, sizeof(wrqu)); + + wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL); + } +#endif + + dev_put(dev); + + out: + kfree(request); +} +EXPORT_SYMBOL(cfg80211_scan_done); + +static void bss_release(struct kref *ref) +{ + struct cfg80211_internal_bss *bss; + + bss = container_of(ref, struct cfg80211_internal_bss, ref); + kfree(bss); +} + +/* must hold dev->bss_lock! */ +void cfg80211_bss_expire(struct cfg80211_registered_device *dev) +{ + struct cfg80211_internal_bss *bss, *tmp; + bool expired = false; + + list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { + if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE)) + continue; + list_del(&bss->list); + rb_erase(&bss->rbn, &dev->bss_tree); + kref_put(&bss->ref, bss_release); + expired = true; + } + + if (expired) + dev->bss_generation++; +} + +static u8 *find_ie(u8 num, u8 *ies, size_t len) +{ + while (len > 2 && ies[0] != num) { + len -= ies[1] + 2; + ies += ies[1] + 2; + } + if (len < 2) + return NULL; + if (len < 2 + ies[1]) + return NULL; + return ies; +} + +static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2) +{ + const u8 *ie1 = find_ie(num, ies1, len1); + const u8 *ie2 = find_ie(num, ies2, len2); + int r; + + if (!ie1 && !ie2) + return 0; + if (!ie1) + return -1; + + r = memcmp(ie1 + 2, ie2 + 2, min(ie1[1], ie2[1])); + if (r == 0 && ie1[1] != ie2[1]) + return ie2[1] - ie1[1]; + return r; +} + +static bool is_bss(struct cfg80211_bss *a, + const u8 *bssid, + const u8 *ssid, size_t ssid_len) +{ + const u8 *ssidie; + + if (compare_ether_addr(a->bssid, bssid)) + return false; + + ssidie = find_ie(WLAN_EID_SSID, + a->information_elements, + a->len_information_elements); + if (!ssidie) + return false; + if (ssidie[1] != ssid_len) + return false; + return memcmp(ssidie + 2, ssid, ssid_len) == 0; +} + +static bool is_mesh(struct cfg80211_bss *a, + const u8 *meshid, size_t meshidlen, + const u8 *meshcfg) +{ + const u8 *ie; + + if (!is_zero_ether_addr(a->bssid)) + return false; + + ie = find_ie(WLAN_EID_MESH_ID, + a->information_elements, + a->len_information_elements); + if (!ie) + return false; + if (ie[1] != meshidlen) + return false; + if (memcmp(ie + 2, meshid, meshidlen)) + return false; + + ie = find_ie(WLAN_EID_MESH_CONFIG, + a->information_elements, + a->len_information_elements); + if (ie[1] != IEEE80211_MESH_CONFIG_LEN) + return false; + + /* + * Ignore mesh capability (last two bytes of the IE) when + * comparing since that may differ between stations taking + * part in the same mesh. + */ + return memcmp(ie + 2, meshcfg, IEEE80211_MESH_CONFIG_LEN - 2) == 0; +} + +static int cmp_bss(struct cfg80211_bss *a, + struct cfg80211_bss *b) +{ + int r; + + if (a->channel != b->channel) + return b->channel->center_freq - a->channel->center_freq; + + r = memcmp(a->bssid, b->bssid, ETH_ALEN); + if (r) + return r; + + if (is_zero_ether_addr(a->bssid)) { + r = cmp_ies(WLAN_EID_MESH_ID, + a->information_elements, + a->len_information_elements, + b->information_elements, + b->len_information_elements); + if (r) + return r; + return cmp_ies(WLAN_EID_MESH_CONFIG, + a->information_elements, + a->len_information_elements, + b->information_elements, + b->len_information_elements); + } + + return cmp_ies(WLAN_EID_SSID, + a->information_elements, + a->len_information_elements, + b->information_elements, + b->len_information_elements); +} + +struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, + struct ieee80211_channel *channel, + const u8 *bssid, + const u8 *ssid, size_t ssid_len) +{ + struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_internal_bss *bss, *res = NULL; + + spin_lock_bh(&dev->bss_lock); + + list_for_each_entry(bss, &dev->bss_list, list) { + if (channel && bss->pub.channel != channel) + continue; + if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { + res = bss; + kref_get(&res->ref); + break; + } + } + + spin_unlock_bh(&dev->bss_lock); + if (!res) + return NULL; + return &res->pub; +} +EXPORT_SYMBOL(cfg80211_get_bss); + +struct cfg80211_bss *cfg80211_get_mesh(struct wiphy *wiphy, + struct ieee80211_channel *channel, + const u8 *meshid, size_t meshidlen, + const u8 *meshcfg) +{ + struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_internal_bss *bss, *res = NULL; + + spin_lock_bh(&dev->bss_lock); + + list_for_each_entry(bss, &dev->bss_list, list) { + if (channel && bss->pub.channel != channel) + continue; + if (is_mesh(&bss->pub, meshid, meshidlen, meshcfg)) { + res = bss; + kref_get(&res->ref); + break; + } + } + + spin_unlock_bh(&dev->bss_lock); + if (!res) + return NULL; + return &res->pub; +} +EXPORT_SYMBOL(cfg80211_get_mesh); + + +static void rb_insert_bss(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *bss) +{ + struct rb_node **p = &dev->bss_tree.rb_node; + struct rb_node *parent = NULL; + struct cfg80211_internal_bss *tbss; + int cmp; + + while (*p) { + parent = *p; + tbss = rb_entry(parent, struct cfg80211_internal_bss, rbn); + + cmp = cmp_bss(&bss->pub, &tbss->pub); + + if (WARN_ON(!cmp)) { + /* will sort of leak this BSS */ + return; + } + + if (cmp < 0) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + + rb_link_node(&bss->rbn, parent, p); + rb_insert_color(&bss->rbn, &dev->bss_tree); +} + +static struct cfg80211_internal_bss * +rb_find_bss(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *res) +{ + struct rb_node *n = dev->bss_tree.rb_node; + struct cfg80211_internal_bss *bss; + int r; + + while (n) { + bss = rb_entry(n, struct cfg80211_internal_bss, rbn); + r = cmp_bss(&res->pub, &bss->pub); + + if (r == 0) + return bss; + else if (r < 0) + n = n->rb_left; + else + n = n->rb_right; + } + + return NULL; +} + +static struct cfg80211_internal_bss * +cfg80211_bss_update(struct cfg80211_registered_device *dev, + struct cfg80211_internal_bss *res, + bool overwrite) +{ + struct cfg80211_internal_bss *found = NULL; + const u8 *meshid, *meshcfg; + + /* + * The reference to "res" is donated to this function. + */ + + if (WARN_ON(!res->pub.channel)) { + kref_put(&res->ref, bss_release); + return NULL; + } + + res->ts = jiffies; + + if (is_zero_ether_addr(res->pub.bssid)) { + /* must be mesh, verify */ + meshid = find_ie(WLAN_EID_MESH_ID, res->pub.information_elements, + res->pub.len_information_elements); + meshcfg = find_ie(WLAN_EID_MESH_CONFIG, + res->pub.information_elements, + res->pub.len_information_elements); + if (!meshid || !meshcfg || + meshcfg[1] != IEEE80211_MESH_CONFIG_LEN) { + /* bogus mesh */ + kref_put(&res->ref, bss_release); + return NULL; + } + } + + spin_lock_bh(&dev->bss_lock); + + found = rb_find_bss(dev, res); + + if (found && overwrite) { + list_replace(&found->list, &res->list); + rb_replace_node(&found->rbn, &res->rbn, + &dev->bss_tree); + kref_put(&found->ref, bss_release); + found = res; + } else if (found) { + kref_get(&found->ref); + found->pub.beacon_interval = res->pub.beacon_interval; + found->pub.tsf = res->pub.tsf; + found->pub.signal = res->pub.signal; + found->pub.signal_type = res->pub.signal_type; + found->pub.capability = res->pub.capability; + found->ts = res->ts; + kref_put(&res->ref, bss_release); + } else { + /* this "consumes" the reference */ + list_add_tail(&res->list, &dev->bss_list); + rb_insert_bss(dev, res); + found = res; + } + + dev->bss_generation++; + spin_unlock_bh(&dev->bss_lock); + + kref_get(&found->ref); + return found; +} + +struct cfg80211_bss * +cfg80211_inform_bss_frame(struct wiphy *wiphy, + struct ieee80211_channel *channel, + struct ieee80211_mgmt *mgmt, size_t len, + s32 signal, enum cfg80211_signal_type sigtype, + gfp_t gfp) +{ + struct cfg80211_internal_bss *res; + size_t ielen = len - offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); + bool overwrite; + size_t privsz = wiphy->bss_priv_size; + + if (WARN_ON(sigtype == NL80211_BSS_SIGNAL_UNSPEC && + (signal < 0 || signal > 100))) + return NULL; + + if (WARN_ON(!mgmt || !wiphy || + len < offsetof(struct ieee80211_mgmt, u.probe_resp.variable))) + return NULL; + + res = kzalloc(sizeof(*res) + privsz + ielen, gfp); + if (!res) + return NULL; + + memcpy(res->pub.bssid, mgmt->bssid, ETH_ALEN); + res->pub.channel = channel; + res->pub.signal_type = sigtype; + res->pub.signal = signal; + res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); + res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); + res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info); + /* point to after the private area */ + res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz; + memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen); + res->pub.len_information_elements = ielen; + + kref_init(&res->ref); + + overwrite = ieee80211_is_probe_resp(mgmt->frame_control); + + res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite); + if (!res) + return NULL; + + /* cfg80211_bss_update gives us a referenced result */ + return &res->pub; +} +EXPORT_SYMBOL(cfg80211_inform_bss_frame); + +void cfg80211_put_bss(struct cfg80211_bss *pub) +{ + struct cfg80211_internal_bss *bss; + + if (!pub) + return; + + bss = container_of(pub, struct cfg80211_internal_bss, pub); + kref_put(&bss->ref, bss_release); +} +EXPORT_SYMBOL(cfg80211_put_bss); + +#ifdef CONFIG_WIRELESS_EXT +int cfg80211_wext_siwscan(struct net_device *dev, + struct iw_request_info *info, + union iwreq_data *wrqu, char *extra) +{ + struct cfg80211_registered_device *rdev; + struct wiphy *wiphy; + struct iw_scan_req *wreq = NULL; + struct cfg80211_scan_request *creq; + int i, err, n_channels = 0; + enum ieee80211_band band; + + if (!netif_running(dev)) + return -ENETDOWN; + + rdev = cfg80211_get_dev_from_ifindex(dev->ifindex); + + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + if (rdev->scan_req) { + err = -EBUSY; + goto out; + } + + wiphy = &rdev->wiphy; + + for (band = 0; band < IEEE80211_NUM_BANDS; band++) + if (wiphy->bands[band]) + n_channels += wiphy->bands[band]->n_channels; + + creq = kzalloc(sizeof(*creq) + sizeof(struct cfg80211_ssid) + + n_channels * sizeof(void *), + GFP_ATOMIC); + if (!creq) { + err = -ENOMEM; + goto out; + } + + creq->wiphy = wiphy; + creq->ifidx = dev->ifindex; + creq->ssids = (void *)(creq + 1); + creq->channels = (void *)(creq->ssids + 1); + creq->n_channels = n_channels; + creq->n_ssids = 1; + + /* all channels */ + i = 0; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { + int j; + if (!wiphy->bands[band]) + continue; + for (j = 0; j < wiphy->bands[band]->n_channels; j++) { + creq->channels[i] = &wiphy->bands[band]->channels[j]; + i++; + } + } + + /* translate scan request */ + if (wrqu->data.length == sizeof(struct iw_scan_req)) { + wreq = (struct iw_scan_req *)extra; + + if (wrqu->data.flags & IW_SCAN_THIS_ESSID) { + if (wreq->essid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; + memcpy(creq->ssids[0].ssid, wreq->essid, wreq->essid_len); + creq->ssids[0].ssid_len = wreq->essid_len; + } + if (wreq->scan_type == IW_SCAN_TYPE_PASSIVE) + creq->n_ssids = 0; + } + + rdev->scan_req = creq; + err = rdev->ops->scan(wiphy, dev, creq); + if (err) { + rdev->scan_req = NULL; + kfree(creq); + } + out: + cfg80211_put_dev(rdev); + return err; +} +EXPORT_SYMBOL(cfg80211_wext_siwscan); + +static void ieee80211_scan_add_ies(struct iw_request_info *info, + struct cfg80211_bss *bss, + char **current_ev, char *end_buf) +{ + u8 *pos, *end, *next; + struct iw_event iwe; + + if (!bss->information_elements || + !bss->len_information_elements) + return; + + /* + * If needed, fragment the IEs buffer (at IE boundaries) into short + * enough fragments to fit into IW_GENERIC_IE_MAX octet messages. + */ + pos = bss->information_elements; + end = pos + bss->len_information_elements; + + while (end - pos > IW_GENERIC_IE_MAX) { + next = pos + 2 + pos[1]; + while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX) + next = next + 2 + next[1]; + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = next - pos; + *current_ev = iwe_stream_add_point(info, *current_ev, + end_buf, &iwe, pos); + + pos = next; + } + + if (end > pos) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = end - pos; + *current_ev = iwe_stream_add_point(info, *current_ev, + end_buf, &iwe, pos); + } +} + + +static char * +ieee80211_bss(struct iw_request_info *info, + struct cfg80211_internal_bss *bss, + char *current_ev, char *end_buf) +{ + struct iw_event iwe; + u8 *buf, *cfg, *p; + u8 *ie = bss->pub.information_elements; + int rem = bss->pub.len_information_elements, i; + bool ismesh = false; + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWAP; + iwe.u.ap_addr.sa_family = ARPHRD_ETHER; + memcpy(iwe.u.ap_addr.sa_data, bss->pub.bssid, ETH_ALEN); + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_ADDR_LEN); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWFREQ; + iwe.u.freq.m = ieee80211_frequency_to_channel(bss->pub.channel->center_freq); + iwe.u.freq.e = 0; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_FREQ_LEN); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWFREQ; + iwe.u.freq.m = bss->pub.channel->center_freq; + iwe.u.freq.e = 6; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_FREQ_LEN); + + if (bss->pub.signal_type != CFG80211_SIGNAL_TYPE_NONE) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVQUAL; + iwe.u.qual.updated = IW_QUAL_LEVEL_UPDATED | + IW_QUAL_NOISE_INVALID | + IW_QUAL_QUAL_INVALID; + switch (bss->pub.signal_type) { + case CFG80211_SIGNAL_TYPE_MBM: + iwe.u.qual.level = bss->pub.signal / 100; + iwe.u.qual.updated |= IW_QUAL_DBM; + break; + case CFG80211_SIGNAL_TYPE_UNSPEC: + iwe.u.qual.level = bss->pub.signal; + break; + default: + /* not reached */ + break; + } + current_ev = iwe_stream_add_event(info, current_ev, end_buf, + &iwe, IW_EV_QUAL_LEN); + } + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWENCODE; + if (bss->pub.capability & WLAN_CAPABILITY_PRIVACY) + iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; + else + iwe.u.data.flags = IW_ENCODE_DISABLED; + iwe.u.data.length = 0; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, ""); + + while (rem >= 2) { + /* invalid data */ + if (ie[1] > rem - 2) + break; + + switch (ie[0]) { + case WLAN_EID_SSID: + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWESSID; + iwe.u.data.length = ie[1]; + iwe.u.data.flags = 1; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, ie + 2); + break; + case WLAN_EID_MESH_ID: + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWESSID; + iwe.u.data.length = ie[1]; + iwe.u.data.flags = 1; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, ie + 2); + break; + case WLAN_EID_MESH_CONFIG: + ismesh = true; + if (ie[1] != IEEE80211_MESH_CONFIG_LEN) + break; + buf = kmalloc(50, GFP_ATOMIC); + if (!buf) + break; + cfg = ie + 2; + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, "Mesh network (version %d)", cfg[0]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Path Selection Protocol ID: " + "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3], + cfg[4]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Path Selection Metric ID: " + "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7], + cfg[8]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Congestion Control Mode ID: " + "0x%02X%02X%02X%02X", cfg[9], cfg[10], + cfg[11], cfg[12]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Channel Precedence: " + "0x%02X%02X%02X%02X", cfg[13], cfg[14], + cfg[15], cfg[16]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + kfree(buf); + break; + case WLAN_EID_SUPP_RATES: + case WLAN_EID_EXT_SUPP_RATES: + /* display all supported rates in readable format */ + p = current_ev + iwe_stream_lcp_len(info); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWRATE; + /* Those two flags are ignored... */ + iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; + + for (i = 0; i < ie[1]; i++) { + iwe.u.bitrate.value = + ((ie[i + 2] & 0x7f) * 500000); + p = iwe_stream_add_value(info, current_ev, p, + end_buf, &iwe, IW_EV_PARAM_LEN); + } + current_ev = p; + break; + } + rem -= ie[1] + 2; + ie += ie[1] + 2; + } + + if (bss->pub.capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) + || ismesh) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWMODE; + if (ismesh) + iwe.u.mode = IW_MODE_MESH; + else if (bss->pub.capability & WLAN_CAPABILITY_ESS) + iwe.u.mode = IW_MODE_MASTER; + else + iwe.u.mode = IW_MODE_ADHOC; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, + &iwe, IW_EV_UINT_LEN); + } + + buf = kmalloc(30, GFP_ATOMIC); + if (buf) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->pub.tsf)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, buf); + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, " Last beacon: %dms ago", + jiffies_to_msecs(jiffies - bss->ts)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); + kfree(buf); + } + + ieee80211_scan_add_ies(info, &bss->pub, ¤t_ev, end_buf); + + return current_ev; +} + + +static int ieee80211_scan_results(struct cfg80211_registered_device *dev, + struct iw_request_info *info, + char *buf, size_t len) +{ + char *current_ev = buf; + char *end_buf = buf + len; + struct cfg80211_internal_bss *bss; + + spin_lock_bh(&dev->bss_lock); + cfg80211_bss_expire(dev); + + list_for_each_entry(bss, &dev->bss_list, list) { + if (buf + len - current_ev <= IW_EV_ADDR_LEN) { + spin_unlock_bh(&dev->bss_lock); + return -E2BIG; + } + current_ev = ieee80211_bss(info, bss, + current_ev, end_buf); + } + spin_unlock_bh(&dev->bss_lock); + return current_ev - buf; +} + + +int cfg80211_wext_giwscan(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct cfg80211_registered_device *rdev; + int res; + + if (!netif_running(dev)) + return -ENETDOWN; + + rdev = cfg80211_get_dev_from_ifindex(dev->ifindex); + + if (IS_ERR(rdev)) + return PTR_ERR(rdev); + + if (rdev->scan_req) { + res = -EAGAIN; + goto out; + } + + res = ieee80211_scan_results(rdev, info, extra, data->length); + data->length = 0; + if (res >= 0) { + data->length = res; + res = 0; + } + + out: + cfg80211_put_dev(rdev); + return res; +} +EXPORT_SYMBOL(cfg80211_wext_giwscan); +#endif -- cgit v1.2.3 From 99cf5f5f3571ce3a837e379d3b87bf5ddf54f17d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:56 +0100 Subject: mac80211: dont add BSS when creating IBSS There's no need to create a BSS struct only to pass it to ieee80211_sta_join_ibss, so refactor this function into __ieee80211_sta_join_ibss which takes all the relevant paramters, and ieee80211_sta_join_ibss which takes a BSS struct (used when joining an IBSS that already has other members). Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 3 -- net/mac80211/mlme.c | 107 ++++++++++++++++++++++++--------------------- net/mac80211/scan.c | 8 +--- 3 files changed, 60 insertions(+), 58 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index cbc0b7d647f9..87d63fe61bf9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -953,9 +953,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, struct ieee80211_channel *channel, bool beacon); struct ieee80211_bss * -ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq, - u8 *ssid, u8 ssid_len); -struct ieee80211_bss * ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, u8 *ssid, u8 ssid_len); void ieee80211_rx_bss_put(struct ieee80211_local *local, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 46b4817cdea9..c5991ec047be 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -546,14 +546,15 @@ void ieee80211_send_pspoll(struct ieee80211_local *local, /* MLME */ static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss *bss) + const size_t supp_rates_len, + const u8 *supp_rates) { struct ieee80211_local *local = sdata->local; int i, have_higher_than_11mbit = 0; /* cf. IEEE 802.11 9.2.12 */ - for (i = 0; i < bss->supp_rates_len; i++) - if ((bss->supp_rates[i] & 0x7f) * 5 > 110) + for (i = 0; i < supp_rates_len; i++) + if ((supp_rates[i] & 0x7f) * 5 > 110) have_higher_than_11mbit = 1; if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && @@ -1546,9 +1547,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, } -static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, - struct ieee80211_bss *bss) +static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta, + const u8 *bssid, const int beacon_int, + const int freq, + const size_t supp_rates_len, + const u8 *supp_rates, + const u16 capability) { struct ieee80211_local *local = sdata->local; int res = 0, rates, i, j; @@ -1564,7 +1569,7 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, } if ((ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) && - memcmp(ifsta->bssid, bss->bssid, ETH_ALEN) == 0) + memcmp(ifsta->bssid, bssid, ETH_ALEN) == 0) return res; skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + @@ -1575,28 +1580,28 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, return -ENOMEM; } - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - if (!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) { /* Remove possible STA entries from other IBSS networks. */ sta_info_flush_delayed(sdata); } - memcpy(ifsta->bssid, bss->bssid, ETH_ALEN); + memcpy(ifsta->bssid, bssid, ETH_ALEN); res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); if (res) return res; - local->hw.conf.beacon_int = bss->beacon_int >= 10 ? bss->beacon_int : 10; + local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10; - sdata->drop_unencrypted = bss->capability & + sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - res = ieee80211_set_freq(sdata, bss->freq); + res = ieee80211_set_freq(sdata, freq); if (res) return res; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + /* Build IBSS probe response */ skb_reserve(skb, local->hw.extra_tx_headroom); @@ -1605,33 +1610,32 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, skb_put(skb, 24 + sizeof(mgmt->u.beacon)); memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_PROBE_RESP); + IEEE80211_STYPE_PROBE_RESP); memset(mgmt->da, 0xff, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(local->hw.conf.beacon_int); - mgmt->u.beacon.timestamp = cpu_to_le64(bss->timestamp); - mgmt->u.beacon.capab_info = cpu_to_le16(bss->capability); + mgmt->u.beacon.capab_info = cpu_to_le16(capability); pos = skb_put(skb, 2 + ifsta->ssid_len); *pos++ = WLAN_EID_SSID; *pos++ = ifsta->ssid_len; memcpy(pos, ifsta->ssid, ifsta->ssid_len); - rates = bss->supp_rates_len; + rates = supp_rates_len; if (rates > 8) rates = 8; pos = skb_put(skb, 2 + rates); *pos++ = WLAN_EID_SUPP_RATES; *pos++ = rates; - memcpy(pos, bss->supp_rates, rates); + memcpy(pos, supp_rates, rates); - if (bss->band == IEEE80211_BAND_2GHZ) { + if (sband->band == IEEE80211_BAND_2GHZ) { pos = skb_put(skb, 2 + 1); *pos++ = WLAN_EID_DS_PARAMS; *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel(bss->freq); + *pos++ = ieee80211_frequency_to_channel(freq); } pos = skb_put(skb, 2 + 2); @@ -1641,12 +1645,12 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, *pos++ = 0; *pos++ = 0; - if (bss->supp_rates_len > 8) { - rates = bss->supp_rates_len - 8; + if (supp_rates_len > 8) { + rates = supp_rates_len - 8; pos = skb_put(skb, 2 + rates); *pos++ = WLAN_EID_EXT_SUPP_RATES; *pos++ = rates; - memcpy(pos, &bss->supp_rates[8], rates); + memcpy(pos, &supp_rates[8], rates); } add_extra_ies(skb, sdata->u.sta.ie_proberesp, @@ -1659,16 +1663,15 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, rates = 0; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - for (i = 0; i < bss->supp_rates_len; i++) { - int bitrate = (bss->supp_rates[i] & 0x7f) * 5; + for (i = 0; i < supp_rates_len; i++) { + int bitrate = (supp_rates[i] & 0x7f) * 5; for (j = 0; j < sband->n_bitrates; j++) if (sband->bitrates[j].bitrate == bitrate) rates |= BIT(j); } ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates; - ieee80211_sta_def_wmm_params(sdata, bss); + ieee80211_sta_def_wmm_params(sdata, supp_rates_len, supp_rates); ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED; @@ -1677,12 +1680,23 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, ieee80211_led_assoc(local, true); memset(&wrqu, 0, sizeof(wrqu)); - memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); + memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); return res; } +static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta, + struct ieee80211_bss *bss) +{ + return __ieee80211_sta_join_ibss(sdata, ifsta, + bss->bssid, bss->beacon_int, + bss->freq, + bss->supp_rates_len, bss->supp_rates, + bss->capability); +} + static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, @@ -2251,11 +2265,12 @@ static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { struct ieee80211_local *local = sdata->local; - struct ieee80211_bss *bss; struct ieee80211_supported_band *sband; - u8 bssid[ETH_ALEN], *pos; + u8 *pos; + u8 bssid[ETH_ALEN]; + u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; + u16 capability; int i; - int ret; if (sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) { memcpy(bssid, ifsta->bssid, ETH_ALEN); @@ -2273,36 +2288,29 @@ static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n", sdata->dev->name, bssid); - bss = ieee80211_rx_bss_add(local, bssid, - local->hw.conf.channel->center_freq, - sdata->u.sta.ssid, sdata->u.sta.ssid_len); - if (!bss) - return -ENOMEM; - - bss->band = local->hw.conf.channel->band; - sband = local->hw.wiphy->bands[bss->band]; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; if (local->hw.conf.beacon_int == 0) local->hw.conf.beacon_int = 100; - bss->beacon_int = local->hw.conf.beacon_int; - bss->last_update = jiffies; - bss->capability = WLAN_CAPABILITY_IBSS; + + capability = WLAN_CAPABILITY_IBSS; if (sdata->default_key) - bss->capability |= WLAN_CAPABILITY_PRIVACY; + capability |= WLAN_CAPABILITY_PRIVACY; else sdata->drop_unencrypted = 0; - bss->supp_rates_len = sband->n_bitrates; - pos = bss->supp_rates; + pos = supp_rates; for (i = 0; i < sband->n_bitrates; i++) { int rate = sband->bitrates[i].bitrate; *pos++ = (u8) (rate / 5); } - ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); - ieee80211_rx_bss_put(local, bss); - return ret; + return __ieee80211_sta_join_ibss(sdata, ifsta, + bssid, local->hw.conf.beacon_int, + local->hw.conf.channel->center_freq, + sband->n_bitrates, supp_rates, + capability); } @@ -2471,7 +2479,8 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, ieee80211_sta_set_ssid(sdata, selected->ssid, selected->ssid_len); ieee80211_sta_set_bssid(sdata, selected->bssid); - ieee80211_sta_def_wmm_params(sdata, selected); + ieee80211_sta_def_wmm_params(sdata, selected->supp_rates_len, + selected->supp_rates); if (sdata->u.sta.mfp == IEEE80211_MFP_REQUIRED) sdata->u.sta.flags |= IEEE80211_STA_MFP_ENABLED; else diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index c6b275b10cf9..fc88e2e2f923 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -14,11 +14,7 @@ /* TODO: * figure out how to avoid that the "current BSS" expires - * clean up IBSS code (in MLME), see why it adds a BSS to the list - * use cfg80211's BSS handling (depends on IBSS TODO above) - * order BSS list by RSSI(?) ("quality of AP") - * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, - * SSID) + * use cfg80211's BSS handling */ #include @@ -107,7 +103,7 @@ static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local, } } -struct ieee80211_bss * +static struct ieee80211_bss * ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq, u8 *ssid, u8 ssid_len) { -- cgit v1.2.3 From 78c1c7e109f1f14e7c18f290c4ebc58da220c7ba Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:57 +0100 Subject: cfg80211: free_priv for BSS info When cfg80211 users have their own allocated data in the per-BSS private data, they will need to free this when the BSS struct is destroyed. Add a free_priv method and fix one place where the BSS was kfree'd rather than released properly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.c | 2 +- net/wireless/scan.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 3cccd1390cea..0668b2bfc1da 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -376,7 +376,7 @@ void cfg80211_dev_free(struct cfg80211_registered_device *drv) mutex_destroy(&drv->mtx); mutex_destroy(&drv->devlist_mtx); list_for_each_entry_safe(scan, tmp, &drv->bss_list, list) - kfree(scan); + cfg80211_put_bss(&scan->pub); kfree(drv); } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 009d12810c55..ec148f68a62b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -56,6 +56,8 @@ static void bss_release(struct kref *ref) struct cfg80211_internal_bss *bss; bss = container_of(ref, struct cfg80211_internal_bss, ref); + if (bss->pub.free_priv) + bss->pub.free_priv(&bss->pub); kfree(bss); } -- cgit v1.2.3 From d491af19db3adcc1eb1653e60a427fb4df36f361 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:58 +0100 Subject: cfg80211: allow users to request removing a BSS This patch introduces cfg80211_unlink_bss, a function to allow a driver to remove a BSS from the internal list and make it not show up in scan results any more -- this is to be used when the driver detects that the BSS is no longer available. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/scan.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index ec148f68a62b..aacccc9ab6ca 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -432,6 +432,27 @@ void cfg80211_put_bss(struct cfg80211_bss *pub) } EXPORT_SYMBOL(cfg80211_put_bss); +void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) +{ + struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); + struct cfg80211_internal_bss *bss; + + if (WARN_ON(!pub)) + return; + + bss = container_of(pub, struct cfg80211_internal_bss, pub); + + spin_lock_bh(&dev->bss_lock); + + list_del(&bss->list); + rb_erase(&bss->rbn, &dev->bss_tree); + + spin_unlock_bh(&dev->bss_lock); + + kref_put(&bss->ref, bss_release); +} +EXPORT_SYMBOL(cfg80211_unlink_bss); + #ifdef CONFIG_WIRELESS_EXT int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, -- cgit v1.2.3 From 79420f09e76e8e1dd1149d6ce9c20e06cbb5802a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:25:59 +0100 Subject: cfg80211: add more flexible BSS lookup Add a more flexible BSS lookup function so that mac80211 or other drivers can actually use this for getting the BSS to connect to. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/scan.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/scan.c b/net/wireless/scan.c index aacccc9ab6ca..b1893c863b97 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -116,9 +116,12 @@ static bool is_bss(struct cfg80211_bss *a, { const u8 *ssidie; - if (compare_ether_addr(a->bssid, bssid)) + if (bssid && compare_ether_addr(a->bssid, bssid)) return false; + if (!ssid) + return true; + ssidie = find_ie(WLAN_EID_SSID, a->information_elements, a->len_information_elements); @@ -199,7 +202,8 @@ static int cmp_bss(struct cfg80211_bss *a, struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, struct ieee80211_channel *channel, const u8 *bssid, - const u8 *ssid, size_t ssid_len) + const u8 *ssid, size_t ssid_len, + u16 capa_mask, u16 capa_val) { struct cfg80211_registered_device *dev = wiphy_to_dev(wiphy); struct cfg80211_internal_bss *bss, *res = NULL; @@ -207,6 +211,8 @@ struct cfg80211_bss *cfg80211_get_bss(struct wiphy *wiphy, spin_lock_bh(&dev->bss_lock); list_for_each_entry(bss, &dev->bss_list, list) { + if ((bss->pub.capability & capa_mask) != capa_val) + continue; if (channel && bss->pub.channel != channel) continue; if (is_bss(&bss->pub, bssid, ssid, ssid_len)) { -- cgit v1.2.3 From 00d3f14cf9f12c21428121026a5e1d5f65926447 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:26:00 +0100 Subject: mac80211: use cfg80211s BSS infrastructure Remove all the code from mac80211 to keep track of BSSes and use the cfg80211-provided code completely. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 42 +++----- net/mac80211/main.c | 6 +- net/mac80211/mesh.c | 10 -- net/mac80211/mesh.h | 1 - net/mac80211/mlme.c | 213 +++++++++++++------------------------- net/mac80211/scan.c | 253 ++++----------------------------------------- net/mac80211/spectmgmt.c | 7 +- 7 files changed, 114 insertions(+), 418 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 87d63fe61bf9..678278344d79 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -72,43 +72,36 @@ struct ieee80211_fragment_entry { struct ieee80211_bss { - struct list_head list; - struct ieee80211_bss *hnext; - size_t ssid_len; + /* Yes, this is a hack */ + struct cfg80211_bss cbss; - atomic_t users; - - u8 bssid[ETH_ALEN]; + /* don't want to look up all the time */ + size_t ssid_len; u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 dtim_period; - u16 capability; /* host byte order */ - enum ieee80211_band band; - int freq; - int signal, noise, qual; - u8 *ies; /* all information elements from the last Beacon or Probe - * Response frames; note Beacon frame is not allowed to - * override values from Probe Response */ - size_t ies_len; + bool wmm_used; + + unsigned long last_probe_resp; + #ifdef CONFIG_MAC80211_MESH u8 *mesh_id; size_t mesh_id_len; u8 *mesh_cfg; #endif + #define IEEE80211_MAX_SUPP_RATES 32 u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; size_t supp_rates_len; - u64 timestamp; - int beacon_int; - unsigned long last_probe_resp; - unsigned long last_update; - - /* during assocation, we save an ERP value from a probe response so + /* + * During assocation, we save an ERP value from a probe response so * that we can feed ERP info to the driver when handling the * association completes. these fields probably won't be up-to-date - * otherwise, you probably don't want to use them. */ - int has_erp_value; + * otherwise, you probably don't want to use them. + */ + bool has_erp_value; u8 erp_value; }; @@ -668,9 +661,6 @@ struct ieee80211_local { struct ieee80211_sub_if_data *scan_sdata; enum nl80211_channel_type oper_channel_type; struct ieee80211_channel *oper_channel, *csa_channel; - struct list_head bss_list; - struct ieee80211_bss *bss_hash[STA_HASH_SIZE]; - spinlock_t bss_lock; /* SNMP counters */ /* dot11CountersTable */ @@ -936,8 +926,6 @@ ieee80211_rx_result ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_rx_status *rx_status); -void ieee80211_rx_bss_list_init(struct ieee80211_local *local); -void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local); int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 954edfbb6b6f..b4973a1b6595 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -734,6 +734,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, wiphy->privid = mac80211_wiphy_privid; wiphy->max_scan_ssids = 4; + /* Yes, putting cfg80211_bss into ieee80211_bss is a hack */ + wiphy->bss_priv_size = sizeof(struct ieee80211_bss) - + sizeof(struct cfg80211_bss); local = wiphy_priv(wiphy); local->hw.wiphy = wiphy; @@ -877,8 +880,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) mpriv->local = local; local->mdev = mdev; - ieee80211_rx_bss_list_init(local); - local->hw.workqueue = create_singlethread_workqueue(wiphy_name(local->hw.wiphy)); if (!local->hw.workqueue) { @@ -1018,7 +1019,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) rtnl_unlock(); - ieee80211_rx_bss_list_deinit(local); ieee80211_clear_tx_pending(local); sta_info_stop(local); rate_control_deinitialize(local); diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 8a1fcaeee4f2..9a3e5de0410a 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -275,16 +275,6 @@ u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_t & tbl->hash_mask; } -u8 mesh_id_hash(u8 *mesh_id, int mesh_id_len) -{ - if (!mesh_id_len) - return 1; - else if (mesh_id_len == 1) - return (u8) mesh_id[0]; - else - return (u8) (mesh_id[0] + 2 * mesh_id[1]); -} - struct mesh_table *mesh_table_alloc(int size_order) { int i; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 9e064ee98ee0..d891d7ddccd7 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -196,7 +196,6 @@ struct mesh_rmc { /* Public interfaces */ /* Various */ -u8 mesh_id_hash(u8 *mesh_id, int mesh_id_len); int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, struct ieee80211_sub_if_data *sdata); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c5991ec047be..c51860f66731 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -55,10 +55,10 @@ static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie) { u8 *end, *pos; - pos = bss->ies; + pos = bss->cbss.information_elements; if (pos == NULL) return NULL; - end = pos + bss->ies_len; + end = pos + bss->cbss.len_information_elements; while (pos + 1 < end) { if (pos + 2 + pos[1] > end) @@ -289,7 +289,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len); if (bss) { - if (bss->capability & WLAN_CAPABILITY_PRIVACY) + if (bss->cbss.capability & WLAN_CAPABILITY_PRIVACY) capab |= WLAN_CAPABILITY_PRIVACY; if (bss->wmm_used) wmm = 1; @@ -300,7 +300,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, * b-only mode) */ rates_len = ieee80211_compatible_rates(bss, sband, &rates); - if ((bss->capability & WLAN_CAPABILITY_SPECTRUM_MGMT) && + if ((bss->cbss.capability & WLAN_CAPABILITY_SPECTRUM_MGMT) && (local->hw.flags & IEEE80211_HW_SPECTRUM_MGMT)) capab |= WLAN_CAPABILITY_SPECTRUM_MGMT; @@ -816,12 +816,12 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ifsta->ssid, ifsta->ssid_len); if (bss) { /* set timing information */ - sdata->vif.bss_conf.beacon_int = bss->beacon_int; - sdata->vif.bss_conf.timestamp = bss->timestamp; + sdata->vif.bss_conf.beacon_int = bss->cbss.beacon_interval; + sdata->vif.bss_conf.timestamp = bss->cbss.tsf; sdata->vif.bss_conf.dtim_period = bss->dtim_period; bss_info_changed |= ieee80211_handle_bss_capability(sdata, - bss->capability, bss->has_erp_value, bss->erp_value); + bss->cbss.capability, bss->has_erp_value, bss->erp_value); ieee80211_rx_bss_put(local, bss); } @@ -1041,7 +1041,7 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, if (!bss) return 0; - bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY); + bss_privacy = !!(bss->cbss.capability & WLAN_CAPABILITY_PRIVACY); wep_privacy = !!ieee80211_sta_wep_configured(sdata); privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); @@ -1416,8 +1416,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* Add STA entry for the AP */ sta = sta_info_get(local, ifsta->bssid); if (!sta) { - struct ieee80211_bss *bss; - newsta = true; sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC); @@ -1427,15 +1425,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); return; } - bss = ieee80211_rx_bss_get(local, ifsta->bssid, - local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); - if (bss) { - sta->last_signal = bss->signal; - sta->last_qual = bss->qual; - sta->last_noise = bss->noise; - ieee80211_rx_bss_put(local, bss); - } /* update new sta with its last rx activity */ sta->last_rx = jiffies; @@ -1691,10 +1680,11 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss) { return __ieee80211_sta_join_ibss(sdata, ifsta, - bss->bssid, bss->beacon_int, - bss->freq, + bss->cbss.bssid, + bss->cbss.beacon_interval, + bss->cbss.channel->center_freq, bss->supp_rates_len, bss->supp_rates, - bss->capability); + bss->cbss.capability); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -1769,7 +1759,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, } /* was just updated in ieee80211_bss_info_update */ - beacon_timestamp = bss->timestamp; + beacon_timestamp = bss->cbss.tsf; /* * In STA mode, the remaining parameters should not be overridden @@ -1784,8 +1774,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, /* check if we need to merge IBSS */ if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon && (!(sdata->u.sta.flags & IEEE80211_STA_BSSID_SET)) && - bss->capability & WLAN_CAPABILITY_IBSS && - bss->freq == local->oper_channel->center_freq && + bss->cbss.capability & WLAN_CAPABILITY_IBSS && + bss->cbss.channel == local->oper_channel && elems->ssid_len == sdata->u.sta.ssid_len && memcmp(elems->ssid, sdata->u.sta.ssid, sdata->u.sta.ssid_len) == 0) { @@ -2230,37 +2220,6 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, netif_carrier_off(sdata->dev); } - -static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta, - const char *ssid, int ssid_len) -{ - int tmp, hidden_ssid; - - if (ssid_len == ifsta->ssid_len && - !memcmp(ifsta->ssid, ssid, ssid_len)) - return 1; - - if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) - return 0; - - hidden_ssid = 1; - tmp = ssid_len; - while (tmp--) { - if (ssid[tmp] != '\0') { - hidden_ssid = 0; - break; - } - } - - if (hidden_ssid && (ifsta->ssid_len == ssid_len || ssid_len == 0)) - return 1; - - if (ssid_len == 1 && ssid[0] == ' ') - return 1; - - return 0; -} - static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { @@ -2319,8 +2278,6 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_bss *bss; - int found = 0; - u8 bssid[ETH_ALEN]; int active_ibss; if (ifsta->ssid_len == 0) @@ -2331,56 +2288,39 @@ static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", sdata->dev->name, active_ibss); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - spin_lock_bh(&local->bss_lock); - list_for_each_entry(bss, &local->bss_list, list) { - if (ifsta->ssid_len != bss->ssid_len || - memcmp(ifsta->ssid, bss->ssid, bss->ssid_len) != 0 - || !(bss->capability & WLAN_CAPABILITY_IBSS)) - continue; - if ((ifsta->flags & IEEE80211_STA_BSSID_SET) && - memcmp(ifsta->bssid, bss->bssid, ETH_ALEN) != 0) - continue; -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " bssid=%pM found\n", bss->bssid); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - memcpy(bssid, bss->bssid, ETH_ALEN); - found = 1; - if (active_ibss || memcmp(bssid, ifsta->bssid, ETH_ALEN) != 0) - break; - } - spin_unlock_bh(&local->bss_lock); + + if (active_ibss) + return 0; + + if (ifsta->flags & IEEE80211_STA_BSSID_SET) + bss = ieee80211_rx_bss_get(local, ifsta->bssid, 0, + ifsta->ssid, ifsta->ssid_len); + else + bss = (void *)cfg80211_get_ibss(local->hw.wiphy, + NULL, + ifsta->ssid, ifsta->ssid_len); #ifdef CONFIG_MAC80211_IBSS_DEBUG - if (found) + if (bss) printk(KERN_DEBUG " sta_find_ibss: selected %pM current " - "%pM\n", bssid, ifsta->bssid); + "%pM\n", bss->cbss.bssid, ifsta->bssid); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - if (found && - ((!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) || - memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0)) { + if (bss && + (!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) || + memcmp(ifsta->bssid, bss->cbss.bssid, ETH_ALEN))) { int ret; - int search_freq; - - if (ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) - search_freq = bss->freq; - else - search_freq = local->hw.conf.channel->center_freq; - - bss = ieee80211_rx_bss_get(local, bssid, search_freq, - ifsta->ssid, ifsta->ssid_len); - if (!bss) - goto dont_join; printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM" " based on configured SSID\n", - sdata->dev->name, bssid); + sdata->dev->name, bss->cbss.bssid); + ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); ieee80211_rx_bss_put(local, bss); return ret; - } + } else if (bss) + ieee80211_rx_bss_put(local, bss); -dont_join: #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG " did not try to join ibss\n"); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ @@ -2436,51 +2376,44 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { struct ieee80211_local *local = sdata->local; - struct ieee80211_bss *bss, *selected = NULL; - int top_rssi = 0, freq; - - spin_lock_bh(&local->bss_lock); - freq = local->oper_channel->center_freq; - list_for_each_entry(bss, &local->bss_list, list) { - if (!(bss->capability & WLAN_CAPABILITY_ESS)) - continue; - - if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | - IEEE80211_STA_AUTO_BSSID_SEL | - IEEE80211_STA_AUTO_CHANNEL_SEL)) && - (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ - !!sdata->default_key)) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && - bss->freq != freq) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) && - memcmp(bss->bssid, ifsta->bssid, ETH_ALEN)) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) && - !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) - continue; - - if (!selected || top_rssi < bss->signal) { - selected = bss; - top_rssi = bss->signal; - } + struct ieee80211_bss *bss; + u8 *bssid = ifsta->bssid, *ssid = ifsta->ssid; + u8 ssid_len = ifsta->ssid_len; + u16 capa_mask = WLAN_CAPABILITY_ESS; + u16 capa_val = WLAN_CAPABILITY_ESS; + struct ieee80211_channel *chan = local->oper_channel; + + if (ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL)) { + capa_mask |= WLAN_CAPABILITY_PRIVACY; + if (sdata->default_key) + capa_val |= WLAN_CAPABILITY_PRIVACY; } - if (selected) - atomic_inc(&selected->users); - spin_unlock_bh(&local->bss_lock); - if (selected) { - ieee80211_set_freq(sdata, selected->freq); + if (ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) + chan = NULL; + + if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) + bssid = NULL; + + if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) { + ssid = NULL; + ssid_len = 0; + } + + bss = (void *)cfg80211_get_bss(local->hw.wiphy, chan, + bssid, ssid, ssid_len, + capa_mask, capa_val); + + if (bss) { + ieee80211_set_freq(sdata, bss->cbss.channel->center_freq); if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) - ieee80211_sta_set_ssid(sdata, selected->ssid, - selected->ssid_len); - ieee80211_sta_set_bssid(sdata, selected->bssid); - ieee80211_sta_def_wmm_params(sdata, selected->supp_rates_len, - selected->supp_rates); + ieee80211_sta_set_ssid(sdata, bss->ssid, + bss->ssid_len); + ieee80211_sta_set_bssid(sdata, bss->cbss.bssid); + ieee80211_sta_def_wmm_params(sdata, bss->supp_rates_len, + bss->supp_rates); if (sdata->u.sta.mfp == IEEE80211_MFP_REQUIRED) sdata->u.sta.flags |= IEEE80211_STA_MFP_ENABLED; else @@ -2489,14 +2422,14 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, /* Send out direct probe if no probe resp was received or * the one we have is outdated */ - if (!selected->last_probe_resp || - time_after(jiffies, selected->last_probe_resp + if (!bss->last_probe_resp || + time_after(jiffies, bss->last_probe_resp + IEEE80211_SCAN_RESULT_EXPIRE)) ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; else ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; - ieee80211_rx_bss_put(local, selected); + ieee80211_rx_bss_put(local, bss); ieee80211_sta_reset_auth(sdata, ifsta); return 0; } else { diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index fc88e2e2f923..f883ab9f1e6e 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -12,10 +12,7 @@ * published by the Free Software Foundation. */ -/* TODO: - * figure out how to avoid that the "current BSS" expires - * use cfg80211's BSS handling - */ +/* TODO: figure out how to avoid that the "current BSS" expires */ #include #include @@ -30,192 +27,29 @@ #define IEEE80211_CHANNEL_TIME (HZ / 33) #define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5) -void ieee80211_rx_bss_list_init(struct ieee80211_local *local) -{ - spin_lock_init(&local->bss_lock); - INIT_LIST_HEAD(&local->bss_list); -} - -void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local) -{ - struct ieee80211_bss *bss, *tmp; - - list_for_each_entry_safe(bss, tmp, &local->bss_list, list) - ieee80211_rx_bss_put(local, bss); -} - struct ieee80211_bss * ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, u8 *ssid, u8 ssid_len) { - struct ieee80211_bss *bss; - - spin_lock_bh(&local->bss_lock); - bss = local->bss_hash[STA_HASH(bssid)]; - while (bss) { - if (!bss_mesh_cfg(bss) && - !memcmp(bss->bssid, bssid, ETH_ALEN) && - bss->freq == freq && - bss->ssid_len == ssid_len && - (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) { - atomic_inc(&bss->users); - break; - } - bss = bss->hnext; - } - spin_unlock_bh(&local->bss_lock); - return bss; -} - -/* Caller must hold local->bss_lock */ -static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local, - struct ieee80211_bss *bss) -{ - u8 hash_idx; - - if (bss_mesh_cfg(bss)) - hash_idx = mesh_id_hash(bss_mesh_id(bss), - bss_mesh_id_len(bss)); - else - hash_idx = STA_HASH(bss->bssid); - - bss->hnext = local->bss_hash[hash_idx]; - local->bss_hash[hash_idx] = bss; -} - -/* Caller must hold local->bss_lock */ -static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local, - struct ieee80211_bss *bss) -{ - struct ieee80211_bss *b, *prev = NULL; - b = local->bss_hash[STA_HASH(bss->bssid)]; - while (b) { - if (b == bss) { - if (!prev) - local->bss_hash[STA_HASH(bss->bssid)] = - bss->hnext; - else - prev->hnext = bss->hnext; - break; - } - prev = b; - b = b->hnext; - } -} - -static struct ieee80211_bss * -ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq, - u8 *ssid, u8 ssid_len) -{ - struct ieee80211_bss *bss; - - bss = kzalloc(sizeof(*bss), GFP_ATOMIC); - if (!bss) - return NULL; - atomic_set(&bss->users, 2); - memcpy(bss->bssid, bssid, ETH_ALEN); - bss->freq = freq; - if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) { - memcpy(bss->ssid, ssid, ssid_len); - bss->ssid_len = ssid_len; - } - - spin_lock_bh(&local->bss_lock); - /* TODO: order by RSSI? */ - list_add_tail(&bss->list, &local->bss_list); - __ieee80211_rx_bss_hash_add(local, bss); - spin_unlock_bh(&local->bss_lock); - return bss; -} - -#ifdef CONFIG_MAC80211_MESH -static struct ieee80211_bss * -ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len, - u8 *mesh_cfg, int freq) -{ - struct ieee80211_bss *bss; - - spin_lock_bh(&local->bss_lock); - bss = local->bss_hash[mesh_id_hash(mesh_id, mesh_id_len)]; - while (bss) { - if (bss_mesh_cfg(bss) && - !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) && - bss->freq == freq && - mesh_id_len == bss->mesh_id_len && - (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id, - mesh_id_len))) { - atomic_inc(&bss->users); - break; - } - bss = bss->hnext; - } - spin_unlock_bh(&local->bss_lock); - return bss; + return (void *)cfg80211_get_bss(local->hw.wiphy, + ieee80211_get_channel(local->hw.wiphy, + freq), + bssid, ssid, ssid_len, + 0, 0); } -static struct ieee80211_bss * -ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len, - u8 *mesh_cfg, int mesh_config_len, int freq) +static void ieee80211_rx_bss_free(struct cfg80211_bss *cbss) { - struct ieee80211_bss *bss; - - if (mesh_config_len != IEEE80211_MESH_CONFIG_LEN) - return NULL; - - bss = kzalloc(sizeof(*bss), GFP_ATOMIC); - if (!bss) - return NULL; - - bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC); - if (!bss->mesh_cfg) { - kfree(bss); - return NULL; - } - - if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) { - bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC); - if (!bss->mesh_id) { - kfree(bss->mesh_cfg); - kfree(bss); - return NULL; - } - memcpy(bss->mesh_id, mesh_id, mesh_id_len); - } - - atomic_set(&bss->users, 2); - memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN); - bss->mesh_id_len = mesh_id_len; - bss->freq = freq; - spin_lock_bh(&local->bss_lock); - /* TODO: order by RSSI? */ - list_add_tail(&bss->list, &local->bss_list); - __ieee80211_rx_bss_hash_add(local, bss); - spin_unlock_bh(&local->bss_lock); - return bss; -} -#endif + struct ieee80211_bss *bss = (void *)cbss; -static void ieee80211_rx_bss_free(struct ieee80211_bss *bss) -{ - kfree(bss->ies); kfree(bss_mesh_id(bss)); kfree(bss_mesh_cfg(bss)); - kfree(bss); } void ieee80211_rx_bss_put(struct ieee80211_local *local, struct ieee80211_bss *bss) { - local_bh_disable(); - if (!atomic_dec_and_lock(&bss->users, &local->bss_lock)) { - local_bh_enable(); - return; - } - - __ieee80211_rx_bss_hash_del(local, bss); - list_del(&bss->list); - spin_unlock_bh(&local->bss_lock); - ieee80211_rx_bss_free(bss); + cfg80211_put_bss((struct cfg80211_bss *)bss); } struct ieee80211_bss * @@ -228,7 +62,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bool beacon) { struct ieee80211_bss *bss; - int clen, freq = channel->center_freq; + int clen; enum cfg80211_signal_type sigtype = CFG80211_SIGNAL_TYPE_NONE; s32 signal = 0; @@ -240,39 +74,14 @@ ieee80211_bss_info_update(struct ieee80211_local *local, signal = (rx_status->signal * 100) / local->hw.max_signal; } - cfg80211_put_bss( - cfg80211_inform_bss_frame(local->hw.wiphy, channel, - mgmt, len, signal, sigtype, - GFP_ATOMIC)); + bss = (void *)cfg80211_inform_bss_frame(local->hw.wiphy, channel, + mgmt, len, signal, sigtype, + GFP_ATOMIC); -#ifdef CONFIG_MAC80211_MESH - if (elems->mesh_config) - bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id, - elems->mesh_id_len, elems->mesh_config, freq); - else -#endif - bss = ieee80211_rx_bss_get(local, mgmt->bssid, freq, - elems->ssid, elems->ssid_len); - if (!bss) { -#ifdef CONFIG_MAC80211_MESH - if (elems->mesh_config) - bss = ieee80211_rx_mesh_bss_add(local, elems->mesh_id, - elems->mesh_id_len, elems->mesh_config, - elems->mesh_config_len, freq); - else -#endif - bss = ieee80211_rx_bss_add(local, mgmt->bssid, freq, - elems->ssid, elems->ssid_len); - if (!bss) - return NULL; - } else { -#if 0 - /* TODO: order by RSSI? */ - spin_lock_bh(&local->bss_lock); - list_move_tail(&bss->list, &local->bss_list); - spin_unlock_bh(&local->bss_lock); -#endif - } + if (!bss) + return NULL; + + bss->cbss.free_priv = ieee80211_rx_bss_free; /* save the ERP value so that it is available at association time */ if (elems->erp_info && elems->erp_info_len >= 1) { @@ -280,9 +89,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->has_erp_value = 1; } - bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); - bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); - if (elems->tim) { struct ieee80211_tim_ie *tim_ie = (struct ieee80211_tim_ie *)elems->tim; @@ -311,34 +117,11 @@ ieee80211_bss_info_update(struct ieee80211_local *local, bss->supp_rates_len += clen; } - bss->band = rx_status->band; - - bss->timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); - bss->last_update = jiffies; - bss->signal = rx_status->signal; - bss->noise = rx_status->noise; - bss->qual = rx_status->qual; bss->wmm_used = elems->wmm_param || elems->wmm_info; if (!beacon) bss->last_probe_resp = jiffies; - /* - * For probe responses, or if we don't have any information yet, - * use the IEs from the beacon. - */ - if (!bss->ies || !beacon) { - if (bss->ies == NULL || bss->ies_len < elems->total_len) { - kfree(bss->ies); - bss->ies = kmalloc(elems->total_len, GFP_ATOMIC); - } - if (bss->ies) { - memcpy(bss->ies, elems->ie_start, elems->total_len); - bss->ies_len = elems->total_len; - } else - bss->ies_len = 0; - } - return bss; } @@ -350,7 +133,7 @@ void ieee80211_rx_bss_remove(struct ieee80211_sub_if_data *sdata, u8 *bssid, bss = ieee80211_rx_bss_get(local, bssid, freq, ssid, ssid_len); if (bss) { - atomic_dec(&bss->users); + cfg80211_unlink_bss(local->hw.wiphy, (void *)bss); ieee80211_rx_bss_put(local, bss); } } diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 8d4ec2968f8f..47bb2aed2813 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -102,8 +102,9 @@ void ieee80211_chswitch_work(struct work_struct *work) goto exit; sdata->local->oper_channel = sdata->local->csa_channel; + /* XXX: shouldn't really modify cfg80211-owned data! */ if (!ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_CHANNEL)) - bss->freq = sdata->local->oper_channel->center_freq; + bss->cbss.channel = sdata->local->oper_channel; ieee80211_rx_bss_put(sdata->local, bss); exit: @@ -158,7 +159,9 @@ void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, IEEE80211_QUEUE_STOP_REASON_CSA); ifsta->flags |= IEEE80211_STA_CSA_RECEIVED; mod_timer(&ifsta->chswitch_timer, - jiffies + msecs_to_jiffies(sw_elem->count * bss->beacon_int)); + jiffies + + msecs_to_jiffies(sw_elem->count * + bss->cbss.beacon_interval)); } } -- cgit v1.2.3 From 9a03d6d7a8698f26f8ef02dd3c91f8f68c4edcc7 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:26:01 +0100 Subject: mac80211: calculate wstats_flags on the fly Just to make wext.c more self-contained. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 1 - net/mac80211/main.c | 8 -------- net/mac80211/wext.c | 21 ++++++++++++++++++--- 3 files changed, 18 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 678278344d79..2cb743ed9f9c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -592,7 +592,6 @@ struct ieee80211_local { int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss; unsigned int filter_flags; /* FIF_* */ struct iw_statistics wstats; - u8 wstats_flags; bool tim_in_locked_section; /* see ieee80211_beacon_get() */ int tx_headroom; /* required headroom for hardware/radiotap */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index b4973a1b6595..5667f4e8067f 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -905,14 +905,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->hw.conf.listen_interval = local->hw.max_listen_interval; - local->wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | - IEEE80211_HW_SIGNAL_DBM) ? - IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; - local->wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? - IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID; - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) - local->wstats_flags |= IW_QUAL_DBM; - result = sta_info_start(local); if (result < 0) goto fail_sta_info; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index b337d7d5edb3..2b023dce8b24 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -145,6 +145,21 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, return -EOPNOTSUPP; } +static u8 ieee80211_get_wstats_flags(struct ieee80211_local *local) +{ + u8 wstats_flags = 0; + + wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | + IEEE80211_HW_SIGNAL_DBM) ? + IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; + wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? + IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID; + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + wstats_flags |= IW_QUAL_DBM; + + return wstats_flags; +} + static int ieee80211_ioctl_giwrange(struct net_device *dev, struct iw_request_info *info, struct iw_point *data, char *extra) @@ -187,13 +202,13 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, range->max_qual.noise = 0; range->max_qual.qual = 100; - range->max_qual.updated = local->wstats_flags; + range->max_qual.updated = ieee80211_get_wstats_flags(local); range->avg_qual.qual = 50; /* not always true but better than nothing */ range->avg_qual.level = range->max_qual.level / 2; range->avg_qual.noise = range->max_qual.noise / 2; - range->avg_qual.updated = local->wstats_flags; + range->avg_qual.updated = ieee80211_get_wstats_flags(local); range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; @@ -979,7 +994,7 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev wstats->qual.level = sta->last_signal; wstats->qual.qual = sta->last_qual; wstats->qual.noise = sta->last_noise; - wstats->qual.updated = local->wstats_flags; + wstats->qual.updated = ieee80211_get_wstats_flags(local); } rcu_read_unlock(); -- cgit v1.2.3 From a71800f3e3de15583c5d336aafa2853786be18a2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:26:02 +0100 Subject: mac80211: fix IBSS auth The code beyond this point is supposed to be used for non-IBSS (managed) mode only. Signed-off-by: Johannes Berg Cc: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c51860f66731..332397415890 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1205,6 +1205,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0); + return; } if (auth_alg != ifsta->auth_alg || -- cgit v1.2.3 From fe3d2c3fe32dd4d0a421ba39caba1cf87402314e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Feb 2009 21:26:03 +0100 Subject: mac80211: split managed/ibss code a little more It appears that you can completely mess up mac80211 in IBSS mode by sending it a disassoc or deauth: it'll stop queues and do a lot more but not ever do anything again. Fix this by not handling all those frames in IBSS mode, Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 254 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 146 insertions(+), 108 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 332397415890..fbb766afe599 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -808,9 +808,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= BSS_CHANGED_ASSOC; ifsta->flags |= IEEE80211_STA_ASSOCIATED; - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return; - bss = ieee80211_rx_bss_get(local, ifsta->bssid, conf->channel->center_freq, ifsta->ssid, ifsta->ssid_len); @@ -1169,6 +1166,30 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, elems.challenge_len + 2, 1); } +static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + u16 auth_alg, auth_transaction, status_code; + + if (len < 24 + 6) + return; + + auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); + auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); + status_code = le16_to_cpu(mgmt->u.auth.status_code); + + /* + * IEEE 802.11 standard does not require authentication in IBSS + * networks and most implementations do not seem to use it. + * However, try to reply to authentication attempts if someone + * has actually implemented this. + */ + if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1) + ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0); +} + static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, @@ -1176,38 +1197,22 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, { u16 auth_alg, auth_transaction, status_code; - if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && - sdata->vif.type != NL80211_IFTYPE_ADHOC) + if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE) return; if (len < 24 + 6) return; - if (sdata->vif.type != NL80211_IFTYPE_ADHOC && - memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) + if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) return; - if (sdata->vif.type != NL80211_IFTYPE_ADHOC && - memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) + if (memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - /* - * IEEE 802.11 standard does not require authentication in IBSS - * networks and most implementations do not seem to use it. - * However, try to reply to authentication attempts if someone - * has actually implemented this. - */ - if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) - return; - ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0); - return; - } - if (auth_alg != ifsta->auth_alg || auth_transaction != ifsta->auth_transaction) return; @@ -1762,74 +1767,85 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, /* was just updated in ieee80211_bss_info_update */ beacon_timestamp = bss->cbss.tsf; - /* - * In STA mode, the remaining parameters should not be overridden - * by beacons because they're not necessarily accurate there. - */ - if (sdata->vif.type != NL80211_IFTYPE_ADHOC && - bss->last_probe_resp && beacon) { - ieee80211_rx_bss_put(local, bss); - return; - } + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) + goto put_bss; /* check if we need to merge IBSS */ - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon && - (!(sdata->u.sta.flags & IEEE80211_STA_BSSID_SET)) && - bss->cbss.capability & WLAN_CAPABILITY_IBSS && - bss->cbss.channel == local->oper_channel && - elems->ssid_len == sdata->u.sta.ssid_len && + + /* merge only on beacons (???) */ + if (!beacon) + goto put_bss; + + /* we use a fixed BSSID */ + if (sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) + goto put_bss; + + /* not an IBSS */ + if (!(bss->cbss.capability & WLAN_CAPABILITY_IBSS)) + goto put_bss; + + /* different channel */ + if (bss->cbss.channel != local->oper_channel) + goto put_bss; + + /* different SSID */ + if (elems->ssid_len != sdata->u.sta.ssid_len || memcmp(elems->ssid, sdata->u.sta.ssid, - sdata->u.sta.ssid_len) == 0) { - if (rx_status->flag & RX_FLAG_TSFT) { - /* in order for correct IBSS merging we need mactime - * - * since mactime is defined as the time the first data - * symbol of the frame hits the PHY, and the timestamp - * of the beacon is defined as "the time that the data - * symbol containing the first bit of the timestamp is - * transmitted to the PHY plus the transmitting STA’s - * delays through its local PHY from the MAC-PHY - * interface to its interface with the WM" - * (802.11 11.1.2) - equals the time this bit arrives at - * the receiver - we have to take into account the - * offset between the two. - * e.g: at 1 MBit that means mactime is 192 usec earlier - * (=24 bytes * 8 usecs/byte) than the beacon timestamp. - */ - int rate; - if (rx_status->flag & RX_FLAG_HT) { - rate = 65; /* TODO: HT rates */ - } else { - rate = local->hw.wiphy->bands[band]-> - bitrates[rx_status->rate_idx].bitrate; - } - rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); - } else if (local && local->ops && local->ops->get_tsf) - /* second best option: get current TSF */ - rx_timestamp = local->ops->get_tsf(local_to_hw(local)); + sdata->u.sta.ssid_len)) + goto put_bss; + + if (rx_status->flag & RX_FLAG_TSFT) { + /* + * For correct IBSS merging we need mactime; since mactime is + * defined as the time the first data symbol of the frame hits + * the PHY, and the timestamp of the beacon is defined as "the + * time that the data symbol containing the first bit of the + * timestamp is transmitted to the PHY plus the transmitting + * STA's delays through its local PHY from the MAC-PHY + * interface to its interface with the WM" (802.11 11.1.2) + * - equals the time this bit arrives at the receiver - we have + * to take into account the offset between the two. + * + * E.g. at 1 MBit that means mactime is 192 usec earlier + * (=24 bytes * 8 usecs/byte) than the beacon timestamp. + */ + int rate; + + if (rx_status->flag & RX_FLAG_HT) + rate = 65; /* TODO: HT rates */ else - /* can't merge without knowing the TSF */ - rx_timestamp = -1LLU; + rate = local->hw.wiphy->bands[band]-> + bitrates[rx_status->rate_idx].bitrate; + + rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); + } else if (local && local->ops && local->ops->get_tsf) + /* second best option: get current TSF */ + rx_timestamp = local->ops->get_tsf(local_to_hw(local)); + else + /* can't merge without knowing the TSF */ + rx_timestamp = -1LLU; + #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "RX beacon SA=%pM BSSID=" - "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", - mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, - (unsigned long long)beacon_timestamp, - (unsigned long long)(rx_timestamp - beacon_timestamp), - jiffies); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - if (beacon_timestamp > rx_timestamp) { + printk(KERN_DEBUG "RX beacon SA=%pM BSSID=" + "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + mgmt->sa, mgmt->bssid, + (unsigned long long)rx_timestamp, + (unsigned long long)beacon_timestamp, + (unsigned long long)(rx_timestamp - beacon_timestamp), + jiffies); +#endif + + if (beacon_timestamp > rx_timestamp) { #ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: beacon TSF higher than " - "local TSF - IBSS merge with BSSID %pM\n", - sdata->dev->name, mgmt->bssid); + printk(KERN_DEBUG "%s: beacon TSF higher than " + "local TSF - IBSS merge with BSSID %pM\n", + sdata->dev->name, mgmt->bssid); #endif - ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss); - ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); - } + ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss); + ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); } + put_bss: ieee80211_rx_bss_put(local, bss); } @@ -1993,8 +2009,7 @@ static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *resp; u8 *pos, *end; - if (sdata->vif.type != NL80211_IFTYPE_ADHOC || - ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || + if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || len < 24 + 2 || !ifsta->probe_resp) return; @@ -2098,31 +2113,54 @@ static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_PROBE_REQ: - ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, skb->len); - break; - case IEEE80211_STYPE_PROBE_RESP: - ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status); - break; - case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); - break; - case IEEE80211_STYPE_AUTH: - ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len); - break; - case IEEE80211_STYPE_ASSOC_RESP: - ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0); - break; - case IEEE80211_STYPE_REASSOC_RESP: - ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1); - break; - case IEEE80211_STYPE_DEAUTH: - ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len); - break; - case IEEE80211_STYPE_DISASSOC: - ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, skb->len); - break; + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_PROBE_REQ: + ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, + skb->len); + break; + case IEEE80211_STYPE_PROBE_RESP: + ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_BEACON: + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_AUTH: + ieee80211_rx_mgmt_auth_ibss(sdata, ifsta, mgmt, + skb->len); + break; + } + } else { /* NL80211_IFTYPE_STATION */ + switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_PROBE_RESP: + ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_BEACON: + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_AUTH: + ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len); + break; + case IEEE80211_STYPE_ASSOC_RESP: + ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, + skb->len, 0); + break; + case IEEE80211_STYPE_REASSOC_RESP: + ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, + skb->len, 1); + break; + case IEEE80211_STYPE_DEAUTH: + ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len); + break; + case IEEE80211_STYPE_DISASSOC: + ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, + skb->len); + break; + } } kfree_skb(skb); -- cgit v1.2.3 From ac45f602ee3d1b6f326f68bc0c2591ceebf05ba4 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:37 +0000 Subject: net: infrastructure for hardware time stamping The additional per-packet information (16 bytes for time stamps, 1 byte for flags) is stored for all packets in the skb_shared_info struct. This implementation detail is hidden from users of that information via skb_* accessor functions. A separate struct resp. union is used for the additional information so that it can be stored/copied easily outside of skb_shared_info. Compared to previous implementations (reusing the tstamp field depending on the context, optional additional structures) this is the simplest solution. It does not extend sk_buff itself. TX time stamping is implemented in software if the device driver doesn't support hardware time stamping. The new semantic for hardware/software time stamping around ndo_start_xmit() is based on two assumptions about existing network device drivers which don't support hardware time stamping and know nothing about it: - they leave the new skb_shared_tx unmodified - the keep the connection to the originating socket in skb->sk alive, i.e., don't call skb_orphan() Given that skb_shared_tx is new, the first assumption is safe. The second is only true for some drivers. As a result, software TX time stamping currently works with the bnx2 driver, but not with the unmodified igb driver (the two drivers this patch series was tested with). Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- net/core/dev.c | 32 ++++++++++++++++++++++++++++++-- net/core/skbuff.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 1e27a67df242..d20c28e839d3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1672,10 +1672,21 @@ static int dev_gso_segment(struct sk_buff *skb) return 0; } +static void tstamp_tx(struct sk_buff *skb) +{ + union skb_shared_tx *shtx = + skb_tx(skb); + if (unlikely(shtx->software && + !shtx->in_progress)) { + skb_tstamp_tx(skb, NULL); + } +} + int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { const struct net_device_ops *ops = dev->netdev_ops; + int rc; prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { @@ -1689,13 +1700,29 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, goto gso; } - return ops->ndo_start_xmit(skb, dev); + rc = ops->ndo_start_xmit(skb, dev); + /* + * TODO: if skb_orphan() was called by + * dev->hard_start_xmit() (for example, the unmodified + * igb driver does that; bnx2 doesn't), then + * skb_tx_software_timestamp() will be unable to send + * back the time stamp. + * + * How can this be prevented? Always create another + * reference to the socket before calling + * dev->hard_start_xmit()? Prevent that skb_orphan() + * does anything in dev->hard_start_xmit() by clearing + * the skb destructor before the call and restoring it + * afterwards, then doing the skb_orphan() ourselves? + */ + if (likely(!rc)) + tstamp_tx(skb); + return rc; } gso: do { struct sk_buff *nskb = skb->next; - int rc; skb->next = nskb->next; nskb->next = NULL; @@ -1705,6 +1732,7 @@ gso: skb->next = nskb; return rc; } + tstamp_tx(skb); if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ab7d2e9f02fa..e5a8351ff12d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include @@ -215,7 +216,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo->gso_segs = 0; shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; + shinfo->tx_flags.flags = 0; shinfo->frag_list = NULL; + memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps)); if (fclone) { struct sk_buff *child = skb + 1; @@ -2945,6 +2948,44 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer) } EXPORT_SYMBOL_GPL(skb_cow_data); +void skb_tstamp_tx(struct sk_buff *orig_skb, + struct skb_shared_hwtstamps *hwtstamps) +{ + struct sock *sk = orig_skb->sk; + struct sock_exterr_skb *serr; + struct sk_buff *skb; + int err; + + if (!sk) + return; + + skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!skb) + return; + + if (hwtstamps) { + *skb_hwtstamps(skb) = + *hwtstamps; + } else { + /* + * no hardware time stamps available, + * so keep the skb_shared_tx and only + * store software time stamp + */ + skb->tstamp = ktime_get_real(); + } + + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + err = sock_queue_err_skb(sk, skb); + if (err) + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(skb_tstamp_tx); + + /** * skb_partial_csum_set - set up and verify partial csum values for packet * @skb: the skb to set -- cgit v1.2.3 From 20d4947353be60e909e6b1a79d241457edd6833f Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:38 +0000 Subject: net: socket infrastructure for SO_TIMESTAMPING The overlap with the old SO_TIMESTAMP[NS] options is handled so that time stamping in software (net_enable_timestamp()) is enabled when SO_TIMESTAMP[NS] and/or SO_TIMESTAMPING_RX_SOFTWARE is set. It's disabled if all of these are off. Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- net/compat.c | 19 ++++++++----- net/core/sock.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++--------- net/socket.c | 84 +++++++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 145 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index a3a2ba0fac08..8d739053afe4 100644 --- a/net/compat.c +++ b/net/compat.c @@ -216,7 +216,7 @@ Efault: int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *data) { struct compat_timeval ctv; - struct compat_timespec cts; + struct compat_timespec cts[3]; struct compat_cmsghdr __user *cm = (struct compat_cmsghdr __user *) kmsg->msg_control; struct compat_cmsghdr cmhdr; int cmlen; @@ -233,12 +233,17 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat data = &ctv; len = sizeof(ctv); } - if (level == SOL_SOCKET && type == SCM_TIMESTAMPNS) { + if (level == SOL_SOCKET && + (type == SCM_TIMESTAMPNS || type == SCM_TIMESTAMPING)) { + int count = type == SCM_TIMESTAMPNS ? 1 : 3; + int i; struct timespec *ts = (struct timespec *)data; - cts.tv_sec = ts->tv_sec; - cts.tv_nsec = ts->tv_nsec; + for (i = 0; i < count; i++) { + cts[i].tv_sec = ts[i].tv_sec; + cts[i].tv_nsec = ts[i].tv_nsec; + } data = &cts; - len = sizeof(cts); + len = sizeof(cts[0]) * count; } cmlen = CMSG_COMPAT_LEN(len); @@ -455,7 +460,7 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) struct timeval tv; if (!sock_flag(sk, SOCK_TIMESTAMP)) - sock_enable_timestamp(sk); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); tv = ktime_to_timeval(sk->sk_stamp); if (tv.tv_sec == -1) return err; @@ -479,7 +484,7 @@ int compat_sock_get_timestampns(struct sock *sk, struct timespec __user *usersta struct timespec ts; if (!sock_flag(sk, SOCK_TIMESTAMP)) - sock_enable_timestamp(sk); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); ts = ktime_to_timespec(sk->sk_stamp); if (ts.tv_sec == -1) return err; diff --git a/net/core/sock.c b/net/core/sock.c index 4c64be4f8765..40887e76652c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -120,6 +120,7 @@ #include #include #include +#include #include #include @@ -255,11 +256,14 @@ static void sock_warn_obsolete_bsdism(const char *name) } } -static void sock_disable_timestamp(struct sock *sk) +static void sock_disable_timestamp(struct sock *sk, int flag) { - if (sock_flag(sk, SOCK_TIMESTAMP)) { - sock_reset_flag(sk, SOCK_TIMESTAMP); - net_disable_timestamp(); + if (sock_flag(sk, flag)) { + sock_reset_flag(sk, flag); + if (!sock_flag(sk, SOCK_TIMESTAMP) && + !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) { + net_disable_timestamp(); + } } } @@ -614,13 +618,38 @@ set_rcvbuf: else sock_set_flag(sk, SOCK_RCVTSTAMPNS); sock_set_flag(sk, SOCK_RCVTSTAMP); - sock_enable_timestamp(sk); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); } else { sock_reset_flag(sk, SOCK_RCVTSTAMP); sock_reset_flag(sk, SOCK_RCVTSTAMPNS); } break; + case SO_TIMESTAMPING: + if (val & ~SOF_TIMESTAMPING_MASK) { + ret = EINVAL; + break; + } + sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE, + val & SOF_TIMESTAMPING_TX_HARDWARE); + sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE, + val & SOF_TIMESTAMPING_TX_SOFTWARE); + sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE, + val & SOF_TIMESTAMPING_RX_HARDWARE); + if (val & SOF_TIMESTAMPING_RX_SOFTWARE) + sock_enable_timestamp(sk, + SOCK_TIMESTAMPING_RX_SOFTWARE); + else + sock_disable_timestamp(sk, + SOCK_TIMESTAMPING_RX_SOFTWARE); + sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE, + val & SOF_TIMESTAMPING_SOFTWARE); + sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE, + val & SOF_TIMESTAMPING_SYS_HARDWARE); + sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE, + val & SOF_TIMESTAMPING_RAW_HARDWARE); + break; + case SO_RCVLOWAT: if (val < 0) val = INT_MAX; @@ -768,6 +797,24 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val = sock_flag(sk, SOCK_RCVTSTAMPNS); break; + case SO_TIMESTAMPING: + v.val = 0; + if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) + v.val |= SOF_TIMESTAMPING_TX_HARDWARE; + if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) + v.val |= SOF_TIMESTAMPING_TX_SOFTWARE; + if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE)) + v.val |= SOF_TIMESTAMPING_RX_HARDWARE; + if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) + v.val |= SOF_TIMESTAMPING_RX_SOFTWARE; + if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) + v.val |= SOF_TIMESTAMPING_SOFTWARE; + if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)) + v.val |= SOF_TIMESTAMPING_SYS_HARDWARE; + if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) + v.val |= SOF_TIMESTAMPING_RAW_HARDWARE; + break; + case SO_RCVTIMEO: lv=sizeof(struct timeval); if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) { @@ -969,7 +1016,8 @@ void sk_free(struct sock *sk) rcu_assign_pointer(sk->sk_filter, NULL); } - sock_disable_timestamp(sk); + sock_disable_timestamp(sk, SOCK_TIMESTAMP); + sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE); if (atomic_read(&sk->sk_omem_alloc)) printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n", @@ -1787,7 +1835,7 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp) { struct timeval tv; if (!sock_flag(sk, SOCK_TIMESTAMP)) - sock_enable_timestamp(sk); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); tv = ktime_to_timeval(sk->sk_stamp); if (tv.tv_sec == -1) return -ENOENT; @@ -1803,7 +1851,7 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) { struct timespec ts; if (!sock_flag(sk, SOCK_TIMESTAMP)) - sock_enable_timestamp(sk); + sock_enable_timestamp(sk, SOCK_TIMESTAMP); ts = ktime_to_timespec(sk->sk_stamp); if (ts.tv_sec == -1) return -ENOENT; @@ -1815,11 +1863,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp) } EXPORT_SYMBOL(sock_get_timestampns); -void sock_enable_timestamp(struct sock *sk) +void sock_enable_timestamp(struct sock *sk, int flag) { - if (!sock_flag(sk, SOCK_TIMESTAMP)) { - sock_set_flag(sk, SOCK_TIMESTAMP); - net_enable_timestamp(); + if (!sock_flag(sk, flag)) { + sock_set_flag(sk, flag); + /* + * we just set one of the two flags which require net + * time stamping, but time stamping might have been on + * already because of the other one + */ + if (!sock_flag(sk, + flag == SOCK_TIMESTAMP ? + SOCK_TIMESTAMPING_RX_SOFTWARE : + SOCK_TIMESTAMP)) + net_enable_timestamp(); } } diff --git a/net/socket.c b/net/socket.c index 35dd7371752a..47a3dc074eb0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -545,6 +545,18 @@ void sock_release(struct socket *sock) sock->file = NULL; } +int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, + union skb_shared_tx *shtx) +{ + shtx->flags = 0; + if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) + shtx->hardware = 1; + if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) + shtx->software = 1; + return 0; +} +EXPORT_SYMBOL(sock_tx_timestamp); + static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { @@ -595,33 +607,65 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, return result; } +static int ktime2ts(ktime_t kt, struct timespec *ts) +{ + if (kt.tv64) { + *ts = ktime_to_timespec(kt); + return 1; + } else { + return 0; + } +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - ktime_t kt = skb->tstamp; - - if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { - struct timeval tv; - /* Race occurred between timestamp enabling and packet - receiving. Fill in the current time for now. */ - if (kt.tv64 == 0) - kt = ktime_get_real(); - skb->tstamp = kt; - tv = ktime_to_timeval(kt); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); - } else { - struct timespec ts; - /* Race occurred between timestamp enabling and packet - receiving. Fill in the current time for now. */ - if (kt.tv64 == 0) - kt = ktime_get_real(); - skb->tstamp = kt; - ts = ktime_to_timespec(kt); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); + int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); + struct timespec ts[3]; + int empty = 1; + struct skb_shared_hwtstamps *shhwtstamps = + skb_hwtstamps(skb); + + /* Race occurred between timestamp enabling and packet + receiving. Fill in the current time for now. */ + if (need_software_tstamp && skb->tstamp.tv64 == 0) + __net_timestamp(skb); + + if (need_software_tstamp) { + if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { + struct timeval tv; + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, + sizeof(tv), &tv); + } else { + struct timespec ts; + skb_get_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, + sizeof(ts), &ts); + } + } + + + memset(ts, 0, sizeof(ts)); + if (skb->tstamp.tv64 && + sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { + skb_get_timestampns(skb, ts + 0); + empty = 0; + } + if (shhwtstamps) { + if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && + ktime2ts(shhwtstamps->syststamp, ts + 1)) + empty = 0; + if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && + ktime2ts(shhwtstamps->hwtstamp, ts + 2)) + empty = 0; } + if (!empty) + put_cmsg(msg, SOL_SOCKET, + SCM_TIMESTAMPING, sizeof(ts), &ts); } EXPORT_SYMBOL_GPL(__sock_recv_timestamp); -- cgit v1.2.3 From 51f31cabe3ce5345b51e4a4f82138b38c4d5dc91 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:39 +0000 Subject: ip: support for TX timestamps on UDP and RAW sockets Instructions for time stamping outgoing packets are take from the socket layer and later copied into the new skb. Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- net/can/raw.c | 3 +++ net/ipv4/icmp.c | 2 ++ net/ipv4/ip_output.c | 6 ++++++ net/ipv4/raw.c | 1 + net/ipv4/udp.c | 4 ++++ 5 files changed, 16 insertions(+) (limited to 'net') diff --git a/net/can/raw.c b/net/can/raw.c index 0703cba4bf9f..6aa154e806ae 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -646,6 +646,9 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, goto put_dev; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); + if (err < 0) + goto free_skb; + err = sock_tx_timestamp(msg, sk, skb_tx(skb)); if (err < 0) goto free_skb; skb->dev = dev; diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 705b33b184a3..382800a62b31 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -375,6 +375,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; + ipc.shtx.flags = 0; if (icmp_param->replyopts.optlen) { ipc.opt = &icmp_param->replyopts; if (ipc.opt->srr) @@ -532,6 +533,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; + ipc.shtx.flags = 0; { struct flowi fl = { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8ebe86dd72af..3e7e910c7c0f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -935,6 +935,10 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; + else + /* only the initial fragment is + time stamped */ + ipc->shtx.flags = 0; } if (skb == NULL) goto error; @@ -945,6 +949,7 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); + *skb_tx(skb) = ipc->shtx; /* * Find where to start putting bytes. @@ -1364,6 +1369,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; + ipc.shtx.flags = 0; if (replyopts.opt.optlen) { ipc.opt = &replyopts.opt; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dff8bc4e0fac..f774651f0a47 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -493,6 +493,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->saddr; ipc.opt = NULL; + ipc.shtx.flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c47c989cb1fb..4bd178a111d5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -596,6 +596,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EOPNOTSUPP; ipc.opt = NULL; + ipc.shtx.flags = 0; if (up->pending) { /* @@ -643,6 +644,9 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->saddr; ipc.oif = sk->sk_bound_dev_if; + err = sock_tx_timestamp(msg, sk, &ipc.shtx); + if (err) + return err; if (msg->msg_controllen) { err = ip_cmsg_send(sock_net(sk), msg, &ipc); if (err) -- cgit v1.2.3 From d24fff22d8dba13cc21034144f68f213415cb7c8 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:40 +0000 Subject: net: pass new SIOCSHWTSTAMP through to device drivers Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d20c28e839d3..d393fc997cd9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4019,6 +4019,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) cmd == SIOCSMIIREG || cmd == SIOCBRADDIF || cmd == SIOCBRDELIF || + cmd == SIOCSHWTSTAMP || cmd == SIOCWANDEV) { err = -EOPNOTSUPP; if (ops->ndo_do_ioctl) { @@ -4173,6 +4174,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCBONDCHANGEACTIVE: case SIOCBRADDIF: case SIOCBRDELIF: + case SIOCSHWTSTAMP: if (!capable(CAP_NET_ADMIN)) return -EPERM; /* fall through */ -- cgit v1.2.3 From 06e868066e3b5828383eb40ff4d1c0029100b0b5 Mon Sep 17 00:00:00 2001 From: Lucas Nussbaum Date: Fri, 13 Feb 2009 08:33:41 +0000 Subject: sctp: Allow to disable SCTP checksums via module parameter This is a new version of my patch, now using a module parameter instead of a sysctl, so that the option is harder to find. Please note that, once the module is loaded, it is still possible to change the value of the parameter in /sys/module/sctp/parameters/, which is useful if you want to do performance comparisons without rebooting. Computation of SCTP checksums significantly affects the performance of SCTP. For example, using two dual-Opteron 246 connected using a Gbe network, it was not possible to achieve more than ~730 Mbps, compared to 941 Mbps after disabling SCTP checksums. Unfortunately, SCTP checksum offloading in NICs is not commonly available (yet). By default, checksums are still enabled, of course. Signed-off-by: Lucas Nussbaum Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/input.c | 3 ++- net/sctp/output.c | 2 +- net/sctp/protocol.c | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index 2e4a8646dbc3..693fd0804810 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -142,7 +142,8 @@ int sctp_rcv(struct sk_buff *skb) __skb_pull(skb, skb_transport_offset(skb)); if (skb->len < sizeof(struct sctphdr)) goto discard_it; - if (!skb_csum_unnecessary(skb) && sctp_rcv_checksum(skb) < 0) + if (!sctp_checksum_disable && !skb_csum_unnecessary(skb) && + sctp_rcv_checksum(skb) < 0) goto discard_it; skb_pull(skb, sizeof(struct sctphdr)); diff --git a/net/sctp/output.c b/net/sctp/output.c index 47bfba6c03ec..2d65b7a7330b 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -531,7 +531,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: Adler-32 is no longer applicable, as has been replaced * by CRC32-C as described in . */ - if (!(dst->dev->features & NETIF_F_NO_CSUM)) { + if (!sctp_checksum_disable && !(dst->dev->features & NETIF_F_NO_CSUM)) { crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); crc32 = sctp_end_cksum(crc32); } else diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index b78e3be69013..cc0b592698f9 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1411,4 +1411,6 @@ MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-132"); MODULE_ALIAS("net-pf-" __stringify(PF_INET6) "-proto-132"); MODULE_AUTHOR("Linux Kernel SCTP developers "); MODULE_DESCRIPTION("Support for the SCTP protocol (RFC2960)"); +module_param_named(no_checksums, sctp_checksum_disable, bool, 0644); +MODULE_PARM_DESC(no_checksums, "Disable checksums computing and verification"); MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 4458f04c02a46c679a90ef71f866a415c192deb4 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 13 Feb 2009 08:33:42 +0000 Subject: sctp: Clean up sctp checksumming code The sctp crc32c checksum is always generated in little endian. So, we clean up the code to treat it as little endian and remove all the __force casts. Suggested by Herbert Xu. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/input.c | 11 ++++++----- net/sctp/output.c | 14 ++++++-------- 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sctp/input.c b/net/sctp/input.c index 693fd0804810..d2e98803ffe3 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -83,14 +83,15 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb) { struct sk_buff *list = skb_shinfo(skb)->frag_list; struct sctphdr *sh = sctp_hdr(skb); - __be32 cmp = sh->checksum; - __be32 val = sctp_start_cksum((__u8 *)sh, skb_headlen(skb)); + __le32 cmp = sh->checksum; + __le32 val; + __u32 tmp = sctp_start_cksum((__u8 *)sh, skb_headlen(skb)); for (; list; list = list->next) - val = sctp_update_cksum((__u8 *)list->data, skb_headlen(list), - val); + tmp = sctp_update_cksum((__u8 *)list->data, skb_headlen(list), + tmp); - val = sctp_end_cksum(val); + val = sctp_end_cksum(tmp); if (val != cmp) { /* CRC failure, dump it. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 2d65b7a7330b..07d58903a746 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -367,7 +367,6 @@ int sctp_packet_transmit(struct sctp_packet *packet) struct sctp_transport *tp = packet->transport; struct sctp_association *asoc = tp->asoc; struct sctphdr *sh; - __be32 crc32 = cpu_to_be32(0); struct sk_buff *nskb; struct sctp_chunk *chunk, *tmp; struct sock *sk; @@ -532,16 +531,15 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in . */ if (!sctp_checksum_disable && !(dst->dev->features & NETIF_F_NO_CSUM)) { - crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); - crc32 = sctp_end_cksum(crc32); + __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); + + /* 3) Put the resultant value into the checksum field in the + * common header, and leave the rest of the bits unchanged. + */ + sh->checksum = sctp_end_cksum(crc32); } else nskb->ip_summed = CHECKSUM_UNNECESSARY; - /* 3) Put the resultant value into the checksum field in the - * common header, and leave the rest of the bits unchanged. - */ - sh->checksum = crc32; - /* IP layer ECN support * From RFC 2481 * "The ECN-Capable Transport (ECT) bit would be set by the -- cgit v1.2.3 From faee47cdbfe8d74a1573c2f81ea6dbb08d735be6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 13 Feb 2009 08:33:43 +0000 Subject: sctp: Fix the RTO-doubling on idle-link heartbeats SCTP incorrectly doubles rto ever time a Hearbeat chunk is generated. However RFC 4960 states: On an idle destination address that is allowed to heartbeat, it is recommended that a HEARTBEAT chunk is sent once per RTO of that destination address plus the protocol parameter 'HB.interval', with jittering of +/- 50% of the RTO value, and exponential backoff of the RTO if the previous HEARTBEAT is unanswered. Essentially, of if the heartbean is unacknowledged, do we double the RTO. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 17 +++++++++++++++-- net/sctp/transport.c | 2 ++ 2 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e1d6076b4f59..0146cfb1f182 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -461,9 +461,15 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * expires, set RTO <- RTO * 2 ("back off the timer"). The * maximum value discussed in rule C7 above (RTO.max) may be * used to provide an upper bound to this doubling operation. + * + * Special Case: the first HB doesn't trigger exponential backoff. + * The first unacknowleged HB triggers it. We do this with a flag + * that indicates that we have an outstanding HB. */ - transport->last_rto = transport->rto; - transport->rto = min((transport->rto * 2), transport->asoc->rto_max); + if (transport->hb_sent) { + transport->last_rto = transport->rto; + transport->rto = min((transport->rto * 2), transport->asoc->rto_max); + } } /* Worker routine to handle INIT command failure. */ @@ -621,6 +627,11 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, t->error_count = 0; t->asoc->overall_error_count = 0; + /* Clear the hb_sent flag to signal that we had a good + * acknowledgement. + */ + t->hb_sent = 0; + /* Mark the destination transport address as active if it is not so * marked. */ @@ -657,6 +668,8 @@ static void sctp_cmd_transport_reset(sctp_cmd_seq_t *cmds, /* Mark one strike against a transport. */ sctp_do_8_2_transport_strike(asoc, t); + + t->hb_sent = 1; } /* Helper function to process the process SACK command. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e745c118f239..5c29b14ee9af 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; + peer->hb_sent = 0; peer->fast_recovery = 0; peer->last_time_heard = jiffies; @@ -608,6 +609,7 @@ void sctp_transport_reset(struct sctp_transport *t) t->flight_size = 0; t->error_count = 0; t->rto_pending = 0; + t->hb_sent = 0; t->fast_recovery = 0; /* Initialize the state information for SFR-CACC */ -- cgit v1.2.3 From 914e1c8b6980c516667375d3e55f0b6e674c8c58 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Fri, 13 Feb 2009 08:33:44 +0000 Subject: sctp: Inherit all socket options from parent correctly. During peeloff/accept() sctp needs to save the parent socket state into the new socket so that any options set on the parent are inherited by the child socket. This was found when the parent/listener socket issues SO_BINDTODEVICE, but the data was misrouted after a route cache flush. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/ipv6.c | 33 +------------------------------- net/sctp/protocol.c | 29 ++-------------------------- net/sctp/socket.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 51 insertions(+), 66 deletions(-) (limited to 'net') diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 786227566696..a63de3f7f185 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -627,9 +627,7 @@ static sctp_scope_t sctp_v6_scope(union sctp_addr *addr) static struct sock *sctp_v6_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { - struct inet_sock *inet = inet_sk(sk); struct sock *newsk; - struct inet_sock *newinet; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct sctp6_sock *newsctp6sk; @@ -639,17 +637,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, sock_init_data(NULL, newsk); - newsk->sk_type = SOCK_STREAM; - - newsk->sk_prot = sk->sk_prot; - newsk->sk_no_check = sk->sk_no_check; - newsk->sk_reuse = sk->sk_reuse; - - newsk->sk_destruct = inet_sock_destruct; - newsk->sk_family = PF_INET6; - newsk->sk_protocol = IPPROTO_SCTP; - newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; - newsk->sk_shutdown = sk->sk_shutdown; + sctp_copy_sock(newsk, sk, asoc); sock_reset_flag(sk, SOCK_ZAPPED); newsctp6sk = (struct sctp6_sock *)newsk; @@ -657,7 +645,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, sctp_sk(newsk)->v4mapped = sctp_sk(sk)->v4mapped; - newinet = inet_sk(newsk); newnp = inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); @@ -665,26 +652,8 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, /* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname() * and getpeername(). */ - newinet->sport = inet->sport; - newnp->saddr = np->saddr; - newnp->rcv_saddr = np->rcv_saddr; - newinet->dport = htons(asoc->peer.port); sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk); - /* Init the ipv4 part of the socket since we can have sockets - * using v6 API for ipv4. - */ - newinet->uc_ttl = -1; - newinet->mc_loop = 1; - newinet->mc_ttl = 1; - newinet->mc_index = 0; - newinet->mc_list = NULL; - - if (ipv4_config.no_pmtu_disc) - newinet->pmtudisc = IP_PMTUDISC_DONT; - else - newinet->pmtudisc = IP_PMTUDISC_WANT; - sk_refcnt_debug_inc(newsk); if (newsk->sk_prot->init(newsk)) { diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index cc0b592698f9..c1e316ee7155 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -589,46 +589,21 @@ static int sctp_v4_is_ce(const struct sk_buff *skb) static struct sock *sctp_v4_create_accept_sk(struct sock *sk, struct sctp_association *asoc) { - struct inet_sock *inet = inet_sk(sk); - struct inet_sock *newinet; struct sock *newsk = sk_alloc(sock_net(sk), PF_INET, GFP_KERNEL, sk->sk_prot); + struct inet_sock *newinet; if (!newsk) goto out; sock_init_data(NULL, newsk); - newsk->sk_type = SOCK_STREAM; - - newsk->sk_no_check = sk->sk_no_check; - newsk->sk_reuse = sk->sk_reuse; - newsk->sk_shutdown = sk->sk_shutdown; - - newsk->sk_destruct = inet_sock_destruct; - newsk->sk_family = PF_INET; - newsk->sk_protocol = IPPROTO_SCTP; - newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + sctp_copy_sock(newsk, sk, asoc); sock_reset_flag(newsk, SOCK_ZAPPED); newinet = inet_sk(newsk); - /* Initialize sk's sport, dport, rcv_saddr and daddr for - * getsockname() and getpeername() - */ - newinet->sport = inet->sport; - newinet->saddr = inet->saddr; - newinet->rcv_saddr = inet->rcv_saddr; - newinet->dport = htons(asoc->peer.port); newinet->daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr; - newinet->pmtudisc = inet->pmtudisc; - newinet->id = asoc->next_tsn ^ jiffies; - - newinet->uc_ttl = -1; - newinet->mc_loop = 1; - newinet->mc_ttl = 1; - newinet->mc_index = 0; - newinet->mc_list = NULL; sk_refcnt_debug_inc(newsk); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ff0a8f88de04..dea864f5de54 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3939,7 +3939,6 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc, { struct sock *sk = asoc->base.sk; struct socket *sock; - struct inet_sock *inetsk; struct sctp_af *af; int err = 0; @@ -3954,18 +3953,18 @@ SCTP_STATIC int sctp_do_peeloff(struct sctp_association *asoc, if (err < 0) return err; - /* Populate the fields of the newsk from the oldsk and migrate the - * asoc to the newsk. - */ - sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH); + sctp_copy_sock(sock->sk, sk, asoc); /* Make peeled-off sockets more like 1-1 accepted sockets. * Set the daddr and initialize id to something more random */ af = sctp_get_af_specific(asoc->peer.primary_addr.sa.sa_family); af->to_sk_daddr(&asoc->peer.primary_addr, sk); - inetsk = inet_sk(sock->sk); - inetsk->id = asoc->next_tsn ^ jiffies; + + /* Populate the fields of the newsk from the oldsk and migrate the + * asoc to the newsk. + */ + sctp_sock_migrate(sk, sock->sk, asoc, SCTP_SOCKET_UDP_HIGH_BANDWIDTH); *sockp = sock; @@ -6700,6 +6699,48 @@ done: sctp_skb_set_owner_r(skb, sk); } +void sctp_copy_sock(struct sock *newsk, struct sock *sk, + struct sctp_association *asoc) +{ + struct inet_sock *inet = inet_sk(sk); + struct inet_sock *newinet = inet_sk(newsk); + + newsk->sk_type = sk->sk_type; + newsk->sk_bound_dev_if = sk->sk_bound_dev_if; + newsk->sk_flags = sk->sk_flags; + newsk->sk_no_check = sk->sk_no_check; + newsk->sk_reuse = sk->sk_reuse; + + newsk->sk_shutdown = sk->sk_shutdown; + newsk->sk_destruct = inet_sock_destruct; + newsk->sk_family = sk->sk_family; + newsk->sk_protocol = IPPROTO_SCTP; + newsk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + newsk->sk_sndbuf = sk->sk_sndbuf; + newsk->sk_rcvbuf = sk->sk_rcvbuf; + newsk->sk_lingertime = sk->sk_lingertime; + newsk->sk_rcvtimeo = sk->sk_rcvtimeo; + newsk->sk_sndtimeo = sk->sk_sndtimeo; + + newinet = inet_sk(newsk); + + /* Initialize sk's sport, dport, rcv_saddr and daddr for + * getsockname() and getpeername() + */ + newinet->sport = inet->sport; + newinet->saddr = inet->saddr; + newinet->rcv_saddr = inet->rcv_saddr; + newinet->dport = htons(asoc->peer.port); + newinet->pmtudisc = inet->pmtudisc; + newinet->id = asoc->next_tsn ^ jiffies; + + newinet->uc_ttl = inet->uc_ttl; + newinet->mc_loop = 1; + newinet->mc_ttl = 1; + newinet->mc_index = 0; + newinet->mc_list = NULL; +} + /* Populate the fields of the newsk from the oldsk and migrate the assoc * and its messages to the newsk. */ -- cgit v1.2.3 From 1c10c49d83e2062b309cc88e8eb3abb05d397480 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 16 Feb 2009 00:08:56 -0800 Subject: net: replace commatas with semicolons Impact: syntax fix Interestingly enough this compiles w/o any complaints: orphans = percpu_counter_sum_positive(&tcp_orphan_count), sockets = percpu_counter_sum_positive(&tcp_sockets_allocated), Signed-off-by: Thomas Gleixner Signed-off-by: David S. Miller --- net/ipv4/proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index eb62e58bff79..cf0cdeeb1db0 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -54,8 +54,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) int orphans, sockets; local_bh_disable(); - orphans = percpu_counter_sum_positive(&tcp_orphan_count), - sockets = percpu_counter_sum_positive(&tcp_sockets_allocated), + orphans = percpu_counter_sum_positive(&tcp_orphan_count); + sockets = percpu_counter_sum_positive(&tcp_sockets_allocated); local_bh_enable(); socket_seq_show(seq); -- cgit v1.2.3 From fe2a7ce4de07472ace0cdf460a41f462a4621687 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Wed, 18 Feb 2009 16:28:35 +0100 Subject: netfilter: change generic l4 protocol number 0 is used by Hop-by-hop header and so this may cause confusion. 255 is stated as 'Reserved' by IANA. Signed-off-by: Christoph Paasch Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 4be80d7b8795..829374f426c4 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -92,7 +92,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, - .l4proto = 0, + .l4proto = 255, .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, -- cgit v1.2.3 From fecea3a389c89de9afae2eda74fad894d5677229 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 18 Feb 2009 16:29:08 +0100 Subject: netfilter: remove unneeded goto Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a90ac83c5918..5bb34737501f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -174,7 +174,6 @@ next_hook: outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; - goto unlock; } else if (verdict == NF_DROP) { kfree_skb(skb); ret = -EPERM; @@ -183,7 +182,6 @@ next_hook: verdict >> NF_VERDICT_BITS)) goto next_hook; } -unlock: rcu_read_unlock(); return ret; } -- cgit v1.2.3 From 9c8222b9e71b690c8388bb0ebe5c3e5a1469e884 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Feb 2009 16:30:20 +0100 Subject: netfilter: x_tables: remove unneeded initializations Later patches change the locking on xt_table and the initialization of the lock element is not needed since the lock is always initialized in xt_table_register anyway. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arptable_filter.c | 2 -- net/ipv4/netfilter/iptable_filter.c | 1 - net/ipv4/netfilter/iptable_mangle.c | 1 - net/ipv4/netfilter/iptable_raw.c | 1 - net/ipv4/netfilter/iptable_security.c | 1 - net/ipv4/netfilter/nf_nat_rule.c | 1 - net/ipv6/netfilter/ip6table_filter.c | 1 - net/ipv6/netfilter/ip6table_mangle.c | 1 - net/ipv6/netfilter/ip6table_raw.c | 1 - net/ipv6/netfilter/ip6table_security.c | 1 - 10 files changed, 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index e091187e864f..6ecfdae7c589 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -48,8 +48,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), - .private = NULL, .me = THIS_MODULE, .af = NFPROTO_ARP, }; diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 52cb6939d093..c30a969724f8 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -56,7 +56,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 3929d20b9e45..4087614d9519 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -67,7 +67,6 @@ static struct static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7f65d18333e3..e5356da1fb54 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -39,7 +39,6 @@ static struct static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index a52a35f4a584..29ab630f240a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -60,7 +60,6 @@ static struct static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(security_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index a7eb04719044..6348a793936e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,6 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 40d2e36d8fac..ef5a0a32bf8e 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -54,7 +54,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index d0b31b259d4d..ab0d398a2ba7 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -60,7 +60,6 @@ static struct static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 109fab6f831a..4b792b6ca321 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -38,7 +38,6 @@ static struct static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 20bc52f13e43..0ea37ff15d56 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -59,7 +59,6 @@ static struct static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(security_table.lock), .me = THIS_MODULE, .af = AF_INET6, }; -- cgit v1.2.3 From 842bff366b536787b88c07cbf2416e2cb26cae67 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Feb 2009 16:30:38 +0100 Subject: netfilter: ebtables: remove unneeded initializations The initialization of the lock element is not needed since the lock is always initialized in ebt_register_table. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/bridge/netfilter/ebtable_broute.c | 1 - net/bridge/netfilter/ebtable_filter.c | 1 - net/bridge/netfilter/ebtable_nat.c | 1 - 3 files changed, 3 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8604dfc1fc3b..c751111440f8 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -46,7 +46,6 @@ static struct ebt_table broute_table = .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .lock = __RW_LOCK_UNLOCKED(broute_table.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 2b2e8040a9c6..a5eea72938a6 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -55,7 +55,6 @@ static struct ebt_table frame_filter = .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_filter.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 3fe1ae87e35f..6024c551f9a9 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -55,7 +55,6 @@ static struct ebt_table frame_nat = .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_nat.lock), .check = check, .me = THIS_MODULE, }; -- cgit v1.2.3 From 55df4ac0c927c7f1f84e6d75532f0ca45d391e64 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Wed, 18 Feb 2009 16:30:56 +0100 Subject: netfilter: log invalid new icmpv6 packet with nf_log_packet() This patch adds a logging message for invalid new icmpv6 packet. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index c323643ffcf9..165b256a6fa0 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -126,6 +126,10 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); + if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) + nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + "nf_ct_icmpv6: invalid new with type %d ", + type + 128); return false; } atomic_set(&ct->proto.icmp.count, 0); -- cgit v1.2.3 From ddc214c43a923e89741e04da2f10e3037a64e222 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Feb 2009 17:47:50 +0100 Subject: netfilter: arp_tables: unfold two critical loops in arp_packet_match() x86 and powerpc can perform long word accesses in an efficient maner. We can use this to unroll two loops in arp_packet_match(), to perform arithmetic on long words instead of bytes. This is a win on x86_64 for example. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 44 +++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7ea88b61cb0d..b5db46342614 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -73,6 +73,36 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, return (ret != 0); } +/* + * Unfortunatly, _b and _mask are not aligned to an int (or long int) + * Some arches dont care, unrolling the loop is a win on them. + */ +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); +#else + unsigned long ret = 0; + int i; + + for (i = 0; i < IFNAMSIZ; i++) + ret |= (_a[i] ^ _b[i]) & _mask[i]; +#endif + return ret; +} + /* Returns whether packet matches rule or not. */ static inline int arp_packet_match(const struct arphdr *arphdr, struct net_device *dev, @@ -83,7 +113,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, const char *arpptr = (char *)(arphdr + 1); const char *src_devaddr, *tgt_devaddr; __be32 src_ipaddr, tgt_ipaddr; - int i, ret; + long ret; #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) @@ -156,10 +186,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, } /* Look for ifname matches. */ - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (indev[i] ^ arpinfo->iniface[i]) - & arpinfo->iniface_mask[i]; - } + ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -168,10 +195,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, return 0; } - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (outdev[i] ^ arpinfo->outiface[i]) - & arpinfo->outiface_mask[i]; - } + ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -221,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; bool hotdrop = false; -- cgit v1.2.3 From 563d36eb3fb22dd04da9aa6f12e1b9ba0ac115f3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 18 Feb 2009 18:38:40 +0100 Subject: netfilter: Combine ipt_TTL and ip6t_HL source Suggested by: James King Similarly to commit c9fd49680954714473d6cbd2546d6ff120f96840, merge TTL and HL. Since HL does not depend on any IPv6-specific function, no new module dependencies would arise. With slight adjustments to the Kconfig help text. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 15 ---- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_TTL.c | 97 ------------------------ net/ipv6/netfilter/Kconfig | 17 ----- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_HL.c | 95 ------------------------ net/netfilter/Kconfig | 15 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_HL.c | 171 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 187 insertions(+), 226 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_TTL.c delete mode 100644 net/ipv6/netfilter/ip6t_HL.c create mode 100644 net/netfilter/xt_HL.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3816e1dc9295..3ad9f43b4c45 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -322,21 +322,6 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_TTL - tristate 'TTL target support' - depends on IP_NF_MANGLE - depends on NETFILTER_ADVANCED - help - This option adds a `TTL' target, which enables the user to modify - the TTL value of the IP header. - - While it is safe to decrement/lower the TTL, this target also enables - functionality to increment and set the TTL value of the IP header to - arbitrary values. This is EXTREMELY DANGEROUS since you can easily - create immortal packets that loop forever on the network. - - To compile it as a module, choose M here. If unsure, say N. - # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 5f9b650d90fc..20b0c37155fb 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -61,7 +61,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o -obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c deleted file mode 100644 index 6d76aae90cc0..000000000000 --- a/net/ipv4/netfilter/ipt_TTL.c +++ /dev/null @@ -1,97 +0,0 @@ -/* TTL modification target for IP tables - * (C) 2000,2005 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); -MODULE_LICENSE("GPL"); - -static unsigned int -ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) -{ - struct iphdr *iph; - const struct ipt_TTL_info *info = par->targinfo; - int new_ttl; - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - iph = ip_hdr(skb); - - switch (info->mode) { - case IPT_TTL_SET: - new_ttl = info->ttl; - break; - case IPT_TTL_INC: - new_ttl = iph->ttl + info->ttl; - if (new_ttl > 255) - new_ttl = 255; - break; - case IPT_TTL_DEC: - new_ttl = iph->ttl - info->ttl; - if (new_ttl < 0) - new_ttl = 0; - break; - default: - new_ttl = iph->ttl; - break; - } - - if (new_ttl != iph->ttl) { - csum_replace2(&iph->check, htons(iph->ttl << 8), - htons(new_ttl << 8)); - iph->ttl = new_ttl; - } - - return XT_CONTINUE; -} - -static bool ttl_tg_check(const struct xt_tgchk_param *par) -{ - const struct ipt_TTL_info *info = par->targinfo; - - if (info->mode > IPT_TTL_MAXMODE) { - printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", - info->mode); - return false; - } - if (info->mode != IPT_TTL_SET && info->ttl == 0) - return false; - return true; -} - -static struct xt_target ttl_tg_reg __read_mostly = { - .name = "TTL", - .family = NFPROTO_IPV4, - .target = ttl_tg, - .targetsize = sizeof(struct ipt_TTL_info), - .table = "mangle", - .checkentry = ttl_tg_check, - .me = THIS_MODULE, -}; - -static int __init ttl_tg_init(void) -{ - return xt_register_target(&ttl_tg_reg); -} - -static void __exit ttl_tg_exit(void) -{ - xt_unregister_target(&ttl_tg_reg); -} - -module_init(ttl_tg_init); -module_exit(ttl_tg_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 53ea512c4608..6a42a968c498 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -170,23 +170,6 @@ config IP6_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_HL - tristate 'HL (hoplimit) target support' - depends on IP6_NF_MANGLE - depends on NETFILTER_ADVANCED - help - This option adds a `HL' target, which enables the user to decrement - the hoplimit value of the IPv6 header or set it to a given (lower) - value. - - While it is safe to decrement the hoplimit value, this option also - enables functionality to increment and set the hoplimit value of the - IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since - you can easily create immortal packets that loop forever on the - network. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_RAW tristate 'raw table support (required for TRACE)' depends on NETFILTER_ADVANCED diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 3f17c948eefb..61a4570d0ede 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,6 +27,5 @@ obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets -obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c deleted file mode 100644 index 27b5adf670a2..000000000000 --- a/net/ipv6/netfilter/ip6t_HL.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Hop Limit modification target for ip6tables - * Maciej Soltysiak - * Based on HW's TTL module - * - * This software is distributed under the terms of GNU GPL - */ - -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Maciej Soltysiak "); -MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target"); -MODULE_LICENSE("GPL"); - -static unsigned int -hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) -{ - struct ipv6hdr *ip6h; - const struct ip6t_HL_info *info = par->targinfo; - int new_hl; - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - ip6h = ipv6_hdr(skb); - - switch (info->mode) { - case IP6T_HL_SET: - new_hl = info->hop_limit; - break; - case IP6T_HL_INC: - new_hl = ip6h->hop_limit + info->hop_limit; - if (new_hl > 255) - new_hl = 255; - break; - case IP6T_HL_DEC: - new_hl = ip6h->hop_limit - info->hop_limit; - if (new_hl < 0) - new_hl = 0; - break; - default: - new_hl = ip6h->hop_limit; - break; - } - - ip6h->hop_limit = new_hl; - - return XT_CONTINUE; -} - -static bool hl_tg6_check(const struct xt_tgchk_param *par) -{ - const struct ip6t_HL_info *info = par->targinfo; - - if (info->mode > IP6T_HL_MAXMODE) { - printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", - info->mode); - return false; - } - if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { - printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " - "make sense with value 0\n"); - return false; - } - return true; -} - -static struct xt_target hl_tg6_reg __read_mostly = { - .name = "HL", - .family = NFPROTO_IPV6, - .target = hl_tg6, - .targetsize = sizeof(struct ip6t_HL_info), - .table = "mangle", - .checkentry = hl_tg6_check, - .me = THIS_MODULE -}; - -static int __init hl_tg6_init(void) -{ - return xt_register_target(&hl_tg6_reg); -} - -static void __exit hl_tg6_exit(void) -{ - xt_unregister_target(&hl_tg6_reg); -} - -module_init(hl_tg6_init); -module_exit(hl_tg6_exit); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c2bac9cd0caf..d99f29b7b980 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -357,6 +357,21 @@ config NETFILTER_XT_TARGET_DSCP To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_HL + tristate '"HL" hoplimit target support' + depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on NETFILTER_ADVANCED + ---help--- + This option adds the "HL" (for IPv6) and "TTL" (for IPv4) + targets, which enable the user to change the + hoplimit/time-to-live value of the IP header. + + While it is safe to decrement the hoplimit/TTL value, the + modules also allow to increment and set the hoplimit value of + the header to arbitrary values. This is EXTREMELY DANGEROUS + since you can easily create immortal packets that loop + forever on the network. + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index da3d909e053f..6ebe0482265b 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o +obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c new file mode 100644 index 000000000000..10e789e2d12a --- /dev/null +++ b/net/netfilter/xt_HL.c @@ -0,0 +1,171 @@ +/* + * TTL modification target for IP tables + * (C) 2000,2005 by Harald Welte + * + * Hop Limit modification target for ip6tables + * Maciej Soltysiak + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_AUTHOR("Maciej Soltysiak "); +MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target"); +MODULE_LICENSE("GPL"); + +static unsigned int +ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + struct iphdr *iph; + const struct ipt_TTL_info *info = par->targinfo; + int new_ttl; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + iph = ip_hdr(skb); + + switch (info->mode) { + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; + } + + if (new_ttl != iph->ttl) { + csum_replace2(&iph->check, htons(iph->ttl << 8), + htons(new_ttl << 8)); + iph->ttl = new_ttl; + } + + return XT_CONTINUE; +} + +static unsigned int +hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) +{ + struct ipv6hdr *ip6h; + const struct ip6t_HL_info *info = par->targinfo; + int new_hl; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + ip6h = ipv6_hdr(skb); + + switch (info->mode) { + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; + } + + ip6h->hop_limit = new_hl; + + return XT_CONTINUE; +} + +static bool ttl_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_TTL_info *info = par->targinfo; + + if (info->mode > IPT_TTL_MAXMODE) { + printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", + info->mode); + return false; + } + if (info->mode != IPT_TTL_SET && info->ttl == 0) + return false; + return true; +} + +static bool hl_tg6_check(const struct xt_tgchk_param *par) +{ + const struct ip6t_HL_info *info = par->targinfo; + + if (info->mode > IP6T_HL_MAXMODE) { + printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", + info->mode); + return false; + } + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { + printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " + "make sense with value 0\n"); + return false; + } + return true; +} + +static struct xt_target hl_tg_reg[] __read_mostly = { + { + .name = "TTL", + .revision = 0, + .family = NFPROTO_IPV4, + .target = ttl_tg, + .targetsize = sizeof(struct ipt_TTL_info), + .table = "mangle", + .checkentry = ttl_tg_check, + .me = THIS_MODULE, + }, + { + .name = "HL", + .revision = 0, + .family = NFPROTO_IPV6, + .target = hl_tg6, + .targetsize = sizeof(struct ip6t_HL_info), + .table = "mangle", + .checkentry = hl_tg6_check, + .me = THIS_MODULE, + }, +}; + +static int __init hl_tg_init(void) +{ + return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); +} + +static void __exit hl_tg_exit(void) +{ + xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); +} + +module_init(hl_tg_init); +module_exit(hl_tg_exit); +MODULE_ALIAS("ipt_TTL"); +MODULE_ALIAS("ip6t_HL"); -- cgit v1.2.3 From cfac5ef7b92a2d504563989ecd0beb563920444b Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 18 Feb 2009 18:39:31 +0100 Subject: netfilter: Combine ipt_ttl and ip6t_hl source Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 9 ---- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_ttl.c | 63 ------------------------- net/ipv6/netfilter/Kconfig | 9 ---- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_hl.c | 68 --------------------------- net/netfilter/Kconfig | 8 ++++ net/netfilter/Makefile | 1 + net/netfilter/xt_hl.c | 108 +++++++++++++++++++++++++++++++++++++++++++ 9 files changed, 117 insertions(+), 151 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_ttl.c delete mode 100644 net/ipv6/netfilter/ip6t_hl.c create mode 100644 net/netfilter/xt_hl.c (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3ad9f43b4c45..40ad41f19b72 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -92,15 +92,6 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_TTL - tristate '"ttl" match support' - depends on NETFILTER_ADVANCED - help - This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user - to match packets by their TTL value. - - To compile it as a module, choose M here. If unsure, say N. - # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 20b0c37155fb..48111594ee9b 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -51,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c deleted file mode 100644 index 297f1cbf4ff5..000000000000 --- a/net/ipv4/netfilter/ipt_ttl.c +++ /dev/null @@ -1,63 +0,0 @@ -/* IP tables module for matching the value of the TTL - * - * (C) 2000,2001 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); -MODULE_LICENSE("GPL"); - -static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ipt_ttl_info *info = par->matchinfo; - const u8 ttl = ip_hdr(skb)->ttl; - - switch (info->mode) { - case IPT_TTL_EQ: - return ttl == info->ttl; - case IPT_TTL_NE: - return ttl != info->ttl; - case IPT_TTL_LT: - return ttl < info->ttl; - case IPT_TTL_GT: - return ttl > info->ttl; - default: - printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", - info->mode); - return false; - } - - return false; -} - -static struct xt_match ttl_mt_reg __read_mostly = { - .name = "ttl", - .family = NFPROTO_IPV4, - .match = ttl_mt, - .matchsize = sizeof(struct ipt_ttl_info), - .me = THIS_MODULE, -}; - -static int __init ttl_mt_init(void) -{ - return xt_register_match(&ttl_mt_reg); -} - -static void __exit ttl_mt_exit(void) -{ - xt_unregister_match(&ttl_mt_reg); -} - -module_init(ttl_mt_init); -module_exit(ttl_mt_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 6a42a968c498..4a8d7ecd6d09 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -94,15 +94,6 @@ config IP6_NF_MATCH_OPTS To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_HL - tristate '"hl" match support' - depends on NETFILTER_ADVANCED - help - HL matching allows you to match packets based on the hop - limit of the packet. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 61a4570d0ede..aafbba30c899 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o -obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c deleted file mode 100644 index c964dca1132d..000000000000 --- a/net/ipv6/netfilter/ip6t_hl.c +++ /dev/null @@ -1,68 +0,0 @@ -/* Hop Limit matching module */ - -/* (C) 2001-2002 Maciej Soltysiak - * Based on HW's ttl module - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Maciej Soltysiak "); -MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match"); -MODULE_LICENSE("GPL"); - -static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ip6t_hl_info *info = par->matchinfo; - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - - switch (info->mode) { - case IP6T_HL_EQ: - return ip6h->hop_limit == info->hop_limit; - break; - case IP6T_HL_NE: - return ip6h->hop_limit != info->hop_limit; - break; - case IP6T_HL_LT: - return ip6h->hop_limit < info->hop_limit; - break; - case IP6T_HL_GT: - return ip6h->hop_limit > info->hop_limit; - break; - default: - printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", - info->mode); - return false; - } - - return false; -} - -static struct xt_match hl_mt6_reg __read_mostly = { - .name = "hl", - .family = NFPROTO_IPV6, - .match = hl_mt6, - .matchsize = sizeof(struct ip6t_hl_info), - .me = THIS_MODULE, -}; - -static int __init hl_mt6_init(void) -{ - return xt_register_match(&hl_mt6_reg); -} - -static void __exit hl_mt6_exit(void) -{ - xt_unregister_match(&hl_mt6_reg); -} - -module_init(hl_mt6_init); -module_exit(hl_mt6_exit); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d99f29b7b980..0eb98b4fbf44 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -620,6 +620,14 @@ config NETFILTER_XT_MATCH_HELPER To compile it as a module, choose M here. If unsure, say Y. +config NETFILTER_XT_MATCH_HL + tristate '"hl" hoplimit/TTL match support' + depends on NETFILTER_ADVANCED + ---help--- + HL matching allows you to match packets based on the hoplimit + in the IPv6 header, or the time-to-live field in the IPv4 + header of the packet. + config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6ebe0482265b..da73ed25701c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o +obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c new file mode 100644 index 000000000000..7726154c87b2 --- /dev/null +++ b/net/netfilter/xt_hl.c @@ -0,0 +1,108 @@ +/* + * IP tables module for matching the value of the TTL + * (C) 2000,2001 by Harald Welte + * + * Hop Limit matching module + * (C) 2001-2002 Maciej Soltysiak + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Maciej Soltysiak "); +MODULE_DESCRIPTION("Xtables: Hoplimit/TTL field match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ttl"); +MODULE_ALIAS("ip6t_hl"); + +static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct ipt_ttl_info *info = par->matchinfo; + const u8 ttl = ip_hdr(skb)->ttl; + + switch (info->mode) { + case IPT_TTL_EQ: + return ttl == info->ttl; + case IPT_TTL_NE: + return ttl != info->ttl; + case IPT_TTL_LT: + return ttl < info->ttl; + case IPT_TTL_GT: + return ttl > info->ttl; + default: + printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", + info->mode); + return false; + } + + return false; +} + +static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct ip6t_hl_info *info = par->matchinfo; + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + + switch (info->mode) { + case IP6T_HL_EQ: + return ip6h->hop_limit == info->hop_limit; + break; + case IP6T_HL_NE: + return ip6h->hop_limit != info->hop_limit; + break; + case IP6T_HL_LT: + return ip6h->hop_limit < info->hop_limit; + break; + case IP6T_HL_GT: + return ip6h->hop_limit > info->hop_limit; + break; + default: + printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", + info->mode); + return false; + } + + return false; +} + +static struct xt_match hl_mt_reg[] __read_mostly = { + { + .name = "ttl", + .revision = 0, + .family = NFPROTO_IPV4, + .match = ttl_mt, + .matchsize = sizeof(struct ipt_ttl_info), + .me = THIS_MODULE, + }, + { + .name = "hl", + .revision = 0, + .family = NFPROTO_IPV6, + .match = hl_mt6, + .matchsize = sizeof(struct ip6t_hl_info), + .me = THIS_MODULE, + }, +}; + +static int __init hl_mt_init(void) +{ + return xt_register_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); +} + +static void __exit hl_mt_exit(void) +{ + xt_unregister_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); +} + +module_init(hl_mt_init); +module_exit(hl_mt_exit); -- cgit v1.2.3 From 4f1c3b7e7ee4d841c8af3a074dc361d6a7a77803 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 18 Feb 2009 19:11:39 +0100 Subject: netfilter: xt_physdev fixes 1) physdev_mt() incorrectly assumes nulldevname[] is aligned on an int 2) It also uses word comparisons, while it could use long word ones. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/xt_physdev.c | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 1bcdfc12cf59..4b13ef7ce145 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -24,9 +24,9 @@ static bool physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) { int i; - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct xt_physdev_info *info = par->matchinfo; - bool ret; + unsigned long ret; const char *indev, *outdev; const struct nf_bridge_info *nf_bridge; @@ -68,10 +68,10 @@ physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { - ret |= (((const unsigned int *)indev)[i] - ^ ((const unsigned int *)info->physindev)[i]) - & ((const unsigned int *)info->in_mask)[i]; + for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { + ret |= (((const unsigned long *)indev)[i] + ^ ((const unsigned long *)info->physindev)[i]) + & ((const unsigned long *)info->in_mask)[i]; } if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) @@ -82,13 +82,12 @@ match_outdev: return true; outdev = nf_bridge->physoutdev ? nf_bridge->physoutdev->name : nulldevname; - for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { - ret |= (((const unsigned int *)outdev)[i] - ^ ((const unsigned int *)info->physoutdev)[i]) - & ((const unsigned int *)info->out_mask)[i]; + for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { + ret |= (((const unsigned long *)outdev)[i] + ^ ((const unsigned long *)info->physoutdev)[i]) + & ((const unsigned long *)info->out_mask)[i]; } - - return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); + return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } static bool physdev_mt_check(const struct xt_mtchk_param *par) -- cgit v1.2.3 From e88721f87d8caa679e62d6004a9a5661f1ac7b0e Mon Sep 17 00:00:00 2001 From: Krishna Kumar Date: Wed, 18 Feb 2009 17:55:02 -0800 Subject: net: Optimize skb_tx_hash() by eliminating a comparison Optimize skb_tx_hash() by eliminating a comparison that executes for every packet. skb_tx_hashrnd initialization is moved to a later part of the startup sequence, namely after the "random" driver is initialized. Rebooted the system three times and verified that the code generates different random numbers each time. Signed-off-by: Krishna Kumar Signed-off-by: David S. Miller --- net/core/dev.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index d393fc997cd9..5493394118fb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1745,17 +1745,11 @@ out_kfree_skb: } static u32 skb_tx_hashrnd; -static int skb_tx_hashrnd_initialized = 0; static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb) { u32 hash; - if (unlikely(!skb_tx_hashrnd_initialized)) { - get_random_bytes(&skb_tx_hashrnd, 4); - skb_tx_hashrnd_initialized = 1; - } - if (skb_rx_queue_recorded(skb)) { hash = skb_get_rx_queue(skb); } else if (skb->sk && skb->sk->sk_hash) { @@ -5291,6 +5285,14 @@ out: subsys_initcall(net_dev_init); +static int __init initialize_hashrnd(void) +{ + get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd)); + return 0; +} + +late_initcall_sync(initialize_hashrnd); + EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); -- cgit v1.2.3 From 4323362e49bd10b8ff3fe5cf183fdd52662ff4a3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Thu, 19 Feb 2009 11:16:03 +0100 Subject: netfilter: xtables: add backward-compat options Concern has been expressed about the changing Kconfig options. Provide the old options that forward-select. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 18 ++++++++++++++++++ net/ipv6/netfilter/Kconfig | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 40ad41f19b72..f8d6180938d5 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -92,6 +92,15 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_TTL + tristate '"ttl" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_MATCH_HL. + # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" @@ -313,6 +322,15 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_TTL + tristate '"TTL" target support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_TARGET_HL. + # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 4a8d7ecd6d09..625353a5fe18 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -94,6 +94,15 @@ config IP6_NF_MATCH_OPTS To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_HL + tristate '"hl" hoplimit match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_MATCH_HL. + config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' default m if NETFILTER_ADVANCED=n @@ -121,6 +130,15 @@ config IP6_NF_MATCH_RT To compile it as a module, choose M here. If unsure, say N. # The targets +config IP6_NF_TARGET_HL + tristate '"HL" hoplimit target support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_TARGET_HL. + config IP6_NF_TARGET_LOG tristate "LOG target support" default m if NETFILTER_ADVANCED=n -- cgit v1.2.3 From eacc17fb64f03b6c268aaf6cea320100d19d8af5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Feb 2009 11:17:17 +0100 Subject: netfilter: xt_physdev: unfold two loops in physdev_mt() xt_physdev netfilter module can use an ifname_compare() helper so that two loops are unfolded. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/xt_physdev.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 4b13ef7ce145..44a234ef4439 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -20,10 +20,27 @@ MODULE_DESCRIPTION("Xtables: Bridge physical device match"); MODULE_ALIAS("ipt_physdev"); MODULE_ALIAS("ip6t_physdev"); +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + static bool physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - int i; static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct xt_physdev_info *info = par->matchinfo; unsigned long ret; @@ -68,11 +85,7 @@ physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)info->physindev)[i]) - & ((const unsigned long *)info->in_mask)[i]; - } + ret = ifname_compare(indev, info->physindev, info->in_mask); if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) return false; @@ -82,11 +95,8 @@ match_outdev: return true; outdev = nf_bridge->physoutdev ? nf_bridge->physoutdev->name : nulldevname; - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)info->physoutdev)[i]) - & ((const unsigned long *)info->out_mask)[i]; - } + ret = ifname_compare(outdev, info->physoutdev, info->out_mask); + return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } -- cgit v1.2.3 From 323dbf96382f057d035afce0237f08e18571ac1d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Feb 2009 11:18:23 +0100 Subject: netfilter: ip6_tables: unfold two loops in ip6_packet_match() ip6_tables netfilter module can use an ifname_compare() helper so that two loops are unfolded. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/ip6_tables.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a33485dc81cb..d64594b6c061 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -89,6 +89,25 @@ ip6t_ext_hdr(u8 nexthdr) (nexthdr == IPPROTO_DSTOPTS) ); } +static unsigned long ifname_compare(const char *_a, const char *_b, + const unsigned char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -99,7 +118,6 @@ ip6_packet_match(const struct sk_buff *skb, unsigned int *protoff, int *fragoff, bool *hotdrop) { - size_t i; unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); @@ -120,12 +138,7 @@ ip6_packet_match(const struct sk_buff *skb, return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ip6info->iniface)[i]) - & ((const unsigned long *)ip6info->iniface_mask)[i]; - } + ret = ifname_compare(indev, ip6info->iniface, ip6info->iniface_mask); if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -134,11 +147,7 @@ ip6_packet_match(const struct sk_buff *skb, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ip6info->outiface)[i]) - & ((const unsigned long *)ip6info->outiface_mask)[i]; - } + ret = ifname_compare(outdev, ip6info->outiface, ip6info->outiface_mask); if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", -- cgit v1.2.3 From 313e458f81ec3852106c5a83830fe0d4f405a71a Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 20 Feb 2009 16:29:08 +0900 Subject: alloc_percpu: add align argument to __alloc_percpu. This prepares for a real __alloc_percpu, by adding an alignment argument. Only one place uses __alloc_percpu directly, and that's for a string. tj: af_inet also uses __alloc_percpu(), update it. Signed-off-by: Rusty Russell Cc: Christoph Lameter Cc: Jens Axboe --- net/ipv4/af_inet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 743f5542d65a..3a3dad801354 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1375,10 +1375,10 @@ EXPORT_SYMBOL_GPL(snmp_fold_field); int snmp_mib_init(void *ptr[2], size_t mibsize) { BUG_ON(ptr == NULL); - ptr[0] = __alloc_percpu(mibsize); + ptr[0] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); if (!ptr[0]) goto err0; - ptr[1] = __alloc_percpu(mibsize); + ptr[1] = __alloc_percpu(mibsize, __alignof__(unsigned long long)); if (!ptr[1]) goto err1; return 0; -- cgit v1.2.3 From 59089d8d162ddcb5c434672e915331964d38a754 Mon Sep 17 00:00:00 2001 From: Santwona Behera Date: Fri, 20 Feb 2009 00:58:13 -0800 Subject: ethtool: Add RX pkt classification interface Signed-off-by: Santwona Behera Signed-off-by: David S. Miller --- net/core/ethtool.c | 58 ++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 947710a36ced..244ca56dffac 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr) return 0; } -static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr) +static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc cmd; - if (!dev->ethtool_ops->set_rxhash) + if (!dev->ethtool_ops->set_rxnfc) return -EOPNOTSUPP; if (copy_from_user(&cmd, useraddr, sizeof(cmd))) return -EFAULT; - return dev->ethtool_ops->set_rxhash(dev, &cmd); + return dev->ethtool_ops->set_rxnfc(dev, &cmd); } -static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr) +static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) { struct ethtool_rxnfc info; + const struct ethtool_ops *ops = dev->ethtool_ops; + int ret; + void *rule_buf = NULL; - if (!dev->ethtool_ops->get_rxhash) + if (!ops->get_rxnfc) return -EOPNOTSUPP; if (copy_from_user(&info, useraddr, sizeof(info))) return -EFAULT; - dev->ethtool_ops->get_rxhash(dev, &info); + if (info.cmd == ETHTOOL_GRXCLSRLALL) { + if (info.rule_cnt > 0) { + rule_buf = kmalloc(info.rule_cnt * sizeof(u32), + GFP_USER); + if (!rule_buf) + return -ENOMEM; + } + } + ret = ops->get_rxnfc(dev, &info, rule_buf); + if (ret < 0) + goto err_out; + + ret = -EFAULT; if (copy_to_user(useraddr, &info, sizeof(info))) - return -EFAULT; - return 0; + goto err_out; + + if (rule_buf) { + useraddr += offsetof(struct ethtool_rxnfc, rule_locs); + if (copy_to_user(useraddr, rule_buf, + info.rule_cnt * sizeof(u32))) + goto err_out; + } + ret = 0; + +err_out: + if (rule_buf) + kfree(rule_buf); + + return ret; } static int ethtool_get_regs(struct net_device *dev, char __user *useraddr) @@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GFLAGS: case ETHTOOL_GPFLAGS: case ETHTOOL_GRXFH: + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: break; default: if (!capable(CAP_NET_ADMIN)) @@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) dev->ethtool_ops->set_priv_flags); break; case ETHTOOL_GRXFH: - rc = ethtool_get_rxhash(dev, useraddr); + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: + rc = ethtool_get_rxnfc(dev, useraddr); break; case ETHTOOL_SRXFH: - rc = ethtool_set_rxhash(dev, useraddr); + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + rc = ethtool_set_rxnfc(dev, useraddr); break; case ETHTOOL_GGRO: rc = ethtool_get_gro(dev, useraddr); -- cgit v1.2.3 From be0c22a46cfb79ab2342bb28fde99afa94ef868e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 18 Feb 2009 01:40:43 +0000 Subject: netlink: add NETLINK_BROADCAST_ERROR socket option This patch adds NETLINK_BROADCAST_ERROR which is a netlink socket option that the listener can set to make netlink_broadcast() return errors in the delivery to the caller. This option is useful if the caller of netlink_broadcast() do something with the result of the message delivery, like in ctnetlink where it drops a network packet if the event delivery failed, this is used to enable reliable logging and state-synchronization. If this socket option is not set, netlink_broadcast() only reports ESRCH errors and silently ignore ENOBUFS errors, which is what most netlink_broadcast() callers should do. This socket option is based on a suggestion from Patrick McHardy. Patrick McHardy can exchange this patch for a beer from me ;). Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6ee69c27f806..ed587be1e1c2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -85,6 +85,7 @@ struct netlink_sock { #define NETLINK_KERNEL_SOCKET 0x1 #define NETLINK_RECV_PKTINFO 0x2 +#define NETLINK_BROADCAST_SEND_ERROR 0x4 static inline struct netlink_sock *nlk_sk(struct sock *sk) { @@ -995,12 +996,15 @@ static inline int do_one_broadcast(struct sock *sk, netlink_overrun(sk); /* Clone failed. Notify ALL listeners. */ p->failure = 1; + if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) + p->delivery_failure = 1; } else if (sk_filter(sk, p->skb2)) { kfree_skb(p->skb2); p->skb2 = NULL; } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) { netlink_overrun(sk); - p->delivery_failure = 1; + if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR) + p->delivery_failure = 1; } else { p->congested |= val; p->delivered = 1; @@ -1048,7 +1052,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, if (info.skb2) kfree_skb(info.skb2); - if (info.delivery_failure || info.failure) + if (info.delivery_failure) return -ENOBUFS; if (info.delivered) { @@ -1163,6 +1167,13 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, err = 0; break; } + case NETLINK_BROADCAST_ERROR: + if (val) + nlk->flags |= NETLINK_BROADCAST_SEND_ERROR; + else + nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1195,6 +1206,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; + case NETLINK_BROADCAST_ERROR: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0; + if (put_user(len, optlen) || + put_user(val, optval)) + return -EFAULT; + err = 0; + break; default: err = -ENOPROTOOPT; } -- cgit v1.2.3 From 784544739a25c30637397ace5489eeb6e15d7d49 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Feb 2009 10:35:32 +0100 Subject: netfilter: iptables: lock free counters The reader/writer lock in ip_tables is acquired in the critical path of processing packets and is one of the reasons just loading iptables can cause a 20% performance loss. The rwlock serves two functions: 1) it prevents changes to table state (xt_replace) while table is in use. This is now handled by doing rcu on the xt_table. When table is replaced, the new table(s) are put in and the old one table(s) are freed after RCU period. 2) it provides synchronization when accesing the counter values. This is now handled by swapping in new table_info entries for each cpu then summing the old values, and putting the result back onto one cpu. On a busy system it may cause sampling to occur at different times on each cpu, but no packet/byte counts are lost in the process. Signed-off-by: Stephen Hemminger Sucessfully tested on my dual quad core machine too, but iptables only (no ipv6 here) BTW, my new "tbench 8" result is 2450 MB/s, (it was 2150 MB/s not so long ago) Acked-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 115 +++++++++++++++++++++++++++++--------- net/ipv4/netfilter/ip_tables.c | 120 +++++++++++++++++++++++++++++----------- net/ipv6/netfilter/ip6_tables.c | 119 +++++++++++++++++++++++++++------------ net/netfilter/x_tables.c | 26 +++++++-- 4 files changed, 280 insertions(+), 100 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index b5db46342614..64a7c6ce0b98 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -261,9 +261,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - read_lock_bh(&table->lock); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -335,7 +336,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - read_unlock_bh(&table->lock); + + rcu_read_unlock(); if (hotdrop) return NF_DROP; @@ -738,11 +740,65 @@ static void get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters *alloc_counters(struct xt_table *table) + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + ARPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + +static inline int +zero_entry_counter(struct arpt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + +static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -752,14 +808,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1099,20 +1171,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. - */ -static inline int add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { @@ -1172,13 +1230,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[smp_processor_id()]; @@ -1187,8 +1246,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); + xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef8b6ca068b2..08cde5bd70a5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -347,10 +347,12 @@ ipt_do_table(struct sk_buff *skb, mtpar.family = tgpar.family = NFPROTO_IPV4; tgpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); /* For return from builtin chain */ @@ -445,7 +447,7 @@ ipt_do_table(struct sk_buff *skb, } } while (!hotdrop); - read_unlock_bh(&table->lock); + rcu_read_unlock(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -924,13 +926,68 @@ get_counters(const struct xt_table_info *t, counters, &i); } + +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + + +static inline int +zero_entry_counter(struct ipt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } } static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -939,14 +996,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1312,27 +1385,6 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ -#if 0 - duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", - *i, - (long unsigned int)e->counters.pcnt, - (long unsigned int)e->counters.bcnt, - (long unsigned int)addme[*i].pcnt, - (long unsigned int)addme[*i].bcnt); -#endif - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) @@ -1393,13 +1445,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; @@ -1408,8 +1461,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index d64594b6c061..34af7bb8df5f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -382,10 +382,12 @@ ip6t_do_table(struct sk_buff *skb, mtpar.family = tgpar.family = NFPROTO_IPV6; tgpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); /* For return from builtin chain */ @@ -483,7 +485,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - read_unlock_bh(&table->lock); + rcu_read_unlock(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -964,11 +966,64 @@ get_counters(const struct xt_table_info *t, } } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IP6T_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + +static inline int +zero_entry_counter(struct ip6t_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -977,14 +1032,28 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + xt_free_table_info(info); - return counters; + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1351,28 +1420,6 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static inline int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ -#if 0 - duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", - *i, - (long unsigned int)e->counters.pcnt, - (long unsigned int)e->counters.bcnt, - (long unsigned int)addme[*i].pcnt, - (long unsigned int)addme[*i].bcnt); -#endif - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) @@ -1433,13 +1480,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; @@ -1448,8 +1496,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index bfbf521f6ea5..bfcac92d5563 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -625,6 +625,20 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); +void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, + struct xt_table_info *newinfo) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + void *p = oldinfo->entries[cpu]; + rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); + newinfo->entries[cpu] = p; + } + +} +EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); + /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -671,21 +685,22 @@ xt_replace_table(struct xt_table *table, struct xt_table_info *oldinfo, *private; /* Do the substitution. */ - write_lock_bh(&table->lock); + mutex_lock(&table->lock); private = table->private; /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - write_unlock_bh(&table->lock); + mutex_unlock(&table->lock); *error = -EAGAIN; return NULL; } oldinfo = private; - table->private = newinfo; + rcu_assign_pointer(table->private, newinfo); newinfo->initial_entries = oldinfo->initial_entries; - write_unlock_bh(&table->lock); + mutex_unlock(&table->lock); + synchronize_net(); return oldinfo; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -719,7 +734,8 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; - rwlock_init(&table->lock); + mutex_init(&table->lock); + if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; -- cgit v1.2.3 From e478075c6f07a383c378fb400edc1a7407a941b0 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 20 Feb 2009 10:47:09 +0100 Subject: netfilter: nf_conntrack: table max size should hold at least table size Table size is defined as unsigned, wheres the table maximum size is defined as a signed integer. The calculation of max is 8 or 4, multiplied the table size. Therefore the max value is aligned to unsigned. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 90ce9ddb9451..f3aa4e65b15e 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_lock); unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); -int nf_conntrack_max __read_mostly; +unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); struct nf_conn nf_conntrack_untracked __read_mostly; -- cgit v1.2.3 From af07d241dc76f0a52c7ff04df3a3970020fe6157 Mon Sep 17 00:00:00 2001 From: Hagen Paul Pfeifer Date: Fri, 20 Feb 2009 10:48:06 +0100 Subject: netfilter: fix hardcoded size assumptions get_random_bytes() is sometimes called with a hard coded size assumption of an integer. This could not be true for next centuries. This patch replace it with a compile time statement. Signed-off-by: Hagen Paul Pfeifer Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 5 +++-- net/netfilter/nf_conntrack_expect.c | 3 ++- net/netfilter/xt_hashlimit.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f3aa4e65b15e..2235432c59d1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -472,7 +472,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, struct nf_conn *ct; if (unlikely(!nf_conntrack_hash_rnd_initted)) { - get_random_bytes(&nf_conntrack_hash_rnd, 4); + get_random_bytes(&nf_conntrack_hash_rnd, + sizeof(nf_conntrack_hash_rnd)); nf_conntrack_hash_rnd_initted = 1; } @@ -1103,7 +1104,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) /* We have to rehahs for the new table anyway, so we also can * use a newrandom seed */ - get_random_bytes(&rnd, 4); + get_random_bytes(&rnd, sizeof(rnd)); /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3a8a34a6d37c..357ba39d4c8d 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -72,7 +72,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple unsigned int hash; if (unlikely(!nf_ct_expect_hash_rnd_initted)) { - get_random_bytes(&nf_ct_expect_hash_rnd, 4); + get_random_bytes(&nf_ct_expect_hash_rnd, + sizeof(nf_ct_expect_hash_rnd)); nf_ct_expect_hash_rnd_initted = 1; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index f97fded024c4..2482055e1c56 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -149,7 +149,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, /* initialize hash with random val at the time we allocate * the first hashtable entry */ if (!ht->rnd_initialized) { - get_random_bytes(&ht->rnd, 4); + get_random_bytes(&ht->rnd, sizeof(ht->rnd)); ht->rnd_initialized = 1; } -- cgit v1.2.3 From 268cb38e1802db560c73167e643f14a3dcb4b07c Mon Sep 17 00:00:00 2001 From: Adam Nielsen Date: Fri, 20 Feb 2009 10:55:14 +0100 Subject: netfilter: x_tables: add LED trigger target Kernel module providing implementation of LED netfilter target. Each instance of the target appears as a led-trigger device, which can be associated with one or more LEDs in /sys/class/leds/ Signed-off-by: Adam Nielsen Acked-by: Richard Purdie Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 24 ++++++++ net/netfilter/Makefile | 1 + net/netfilter/xt_LED.c | 161 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 186 insertions(+) create mode 100644 net/netfilter/xt_LED.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0eb98b4fbf44..cdbaaff6d0d6 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -372,6 +372,30 @@ config NETFILTER_XT_TARGET_HL since you can easily create immortal packets that loop forever on the network. +config NETFILTER_XT_TARGET_LED + tristate '"LED" target support' + depends on LEDS_CLASS + depends on NETFILTER_ADVANCED + help + This option adds a `LED' target, which allows you to blink LEDs in + response to particular packets passing through your machine. + + This can be used to turn a spare LED into a network activity LED, + which only flashes in response to FTP transfers, for example. Or + you could have an LED which lights up for a minute or two every time + somebody connects to your machine via SSH. + + You will need support for the "led" class to make this work. + + To create an LED trigger for incoming SSH traffic: + iptables -A INPUT -p tcp --dport 22 -j LED --led-trigger-id ssh --led-delay 1000 + + Then attach the new trigger to an LED on your system: + echo netfilter-ssh > /sys/class/leds//trigger + + For more information on the LEDs available on your system, see + Documentation/leds-class.txt + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index da73ed25701c..7a9b8397573a 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -46,6 +46,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o +obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c new file mode 100644 index 000000000000..8ff7843bb921 --- /dev/null +++ b/net/netfilter/xt_LED.c @@ -0,0 +1,161 @@ +/* + * xt_LED.c - netfilter target to make LEDs blink upon packet matches + * + * Copyright (C) 2008 Adam Nielsen + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + * + */ + +#include +#include +#include +#include +#include + +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adam Nielsen "); +MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); + +/* + * This is declared in here (the kernel module) only, to avoid having these + * dependencies in userspace code. This is what xt_led_info.internal_data + * points to. + */ +struct xt_led_info_internal { + struct led_trigger netfilter_led_trigger; + struct timer_list timer; +}; + +static unsigned int +led_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_led_info *ledinfo = par->targinfo; + struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + + /* + * If "always blink" is enabled, and there's still some time until the + * LED will switch off, briefly switch it off now. + */ + if ((ledinfo->delay > 0) && ledinfo->always_blink && + timer_pending(&ledinternal->timer)) + led_trigger_event(&ledinternal->netfilter_led_trigger,LED_OFF); + + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); + + /* If there's a positive delay, start/update the timer */ + if (ledinfo->delay > 0) { + mod_timer(&ledinternal->timer, + jiffies + msecs_to_jiffies(ledinfo->delay)); + + /* Otherwise if there was no delay given, blink as fast as possible */ + } else if (ledinfo->delay == 0) { + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); + } + + /* else the delay is negative, which means switch on and stay on */ + + return XT_CONTINUE; +} + +static void led_timeout_callback(unsigned long data) +{ + struct xt_led_info *ledinfo = (struct xt_led_info *)data; + struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); +} + +static bool led_tg_check(const struct xt_tgchk_param *par) +{ + struct xt_led_info *ledinfo = par->targinfo; + struct xt_led_info_internal *ledinternal; + int err; + + if (ledinfo->id[0] == '\0') { + printk(KERN_ERR KBUILD_MODNAME ": No 'id' parameter given.\n"); + return false; + } + + ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL); + if (!ledinternal) { + printk(KERN_CRIT KBUILD_MODNAME ": out of memory\n"); + return false; + } + + ledinternal->netfilter_led_trigger.name = ledinfo->id; + + err = led_trigger_register(&ledinternal->netfilter_led_trigger); + if (err) { + printk(KERN_CRIT KBUILD_MODNAME + ": led_trigger_register() failed\n"); + if (err == -EEXIST) + printk(KERN_ERR KBUILD_MODNAME + ": Trigger name is already in use.\n"); + goto exit_alloc; + } + + /* See if we need to set up a timer */ + if (ledinfo->delay > 0) + setup_timer(&ledinternal->timer, led_timeout_callback, + (unsigned long)ledinfo); + + ledinfo->internal_data = ledinternal; + + return true; + +exit_alloc: + kfree(ledinternal); + + return false; +} + +static void led_tg_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_led_info *ledinfo = par->targinfo; + struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + + if (ledinfo->delay > 0) + del_timer_sync(&ledinternal->timer); + + led_trigger_unregister(&ledinternal->netfilter_led_trigger); + kfree(ledinternal); +} + +static struct xt_target led_tg_reg __read_mostly = { + .name = "LED", + .revision = 0, + .family = NFPROTO_UNSPEC, + .target = led_tg, + .targetsize = XT_ALIGN(sizeof(struct xt_led_info)), + .checkentry = led_tg_check, + .destroy = led_tg_destroy, + .me = THIS_MODULE, +}; + +static int __init led_tg_init(void) +{ + return xt_register_target(&led_tg_reg); +} + +static void __exit led_tg_exit(void) +{ + xt_unregister_target(&led_tg_reg); +} + +module_init(led_tg_init); +module_exit(led_tg_exit); -- cgit v1.2.3 From 08361aa807ae5e5007cd226ca9e34287512de737 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 20 Feb 2009 11:03:33 +0100 Subject: netfilter: ip_tables: unfold two critical loops in ip_packet_match() While doing oprofile tests I noticed two loops are not properly unrolled by gcc Using a hand coded unrolled loop provides nice speedup : ipt_do_table credited of 2.52 % of cpu instead of 3.29 % in tbench. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_tables.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 08cde5bd70a5..e5294aec967d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -74,6 +74,25 @@ do { \ Hence the start of any table is given by get_table() below. */ +static unsigned long ifname_compare(const char *_a, const char *_b, + const unsigned char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -83,7 +102,6 @@ ip_packet_match(const struct iphdr *ip, const struct ipt_ip *ipinfo, int isfrag) { - size_t i; unsigned long ret; #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) @@ -103,12 +121,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ipinfo->iniface)[i]) - & ((const unsigned long *)ipinfo->iniface_mask)[i]; - } + ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask); if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -117,11 +130,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ipinfo->outiface)[i]) - & ((const unsigned long *)ipinfo->outiface_mask)[i]; - } + ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", -- cgit v1.2.3 From cd4d8fdad1f13205c769266dfa99015e226b6e07 Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Sat, 21 Feb 2009 02:42:18 -0800 Subject: net: kernel panic in dev_hard_start_xmit: remove faulty software TX time stamping The current implementation of the TX software time stamping fallback is faulty because it accesses the skb after ndo_start_xmit() returns successfully. This patch removes the fallback, which fixes kernel panics seen during stress tests. Hardware time stamping is not affected by this removal. Signed-off-by: Patrick Ohly Signed-off-by: Emil Tantilov Signed-off-by: David S. Miller --- net/core/dev.c | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 5493394118fb..88dc082b47d1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1672,16 +1672,6 @@ static int dev_gso_segment(struct sk_buff *skb) return 0; } -static void tstamp_tx(struct sk_buff *skb) -{ - union skb_shared_tx *shtx = - skb_tx(skb); - if (unlikely(shtx->software && - !shtx->in_progress)) { - skb_tstamp_tx(skb, NULL); - } -} - int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { @@ -1715,8 +1705,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, * the skb destructor before the call and restoring it * afterwards, then doing the skb_orphan() ourselves? */ - if (likely(!rc)) - tstamp_tx(skb); return rc; } @@ -1732,7 +1720,6 @@ gso: skb->next = nskb; return rc; } - tstamp_tx(skb); if (unlikely(netif_tx_queue_stopped(txq) && skb->next)) return NETDEV_TX_BUSY; } while (skb->next); -- cgit v1.2.3 From 66da8c529ad4d330a268ac08aa101b87c5c911ff Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Sat, 21 Feb 2009 23:37:10 -0800 Subject: ipv6: fix sparse warning: Using plain integer as NULL pointer Fix this sparse warning: net/ipv6/xfrm6_state.c:72:26: warning: Using plain integer as NULL pointer Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/ipv6/xfrm6_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 0e685b05496e..f417b77fa0e1 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -69,7 +69,7 @@ __xfrm6_sort(void **dst, void **src, int n, int (*cmp)(void *p), int maxclass) for (i = 0; i < n; i++) { dst[count[class[i] - 1]++] = src[i]; - src[i] = 0; + src[i] = NULL; } return 0; -- cgit v1.2.3 From 7691367d71fd77ab668ff3b6edb4340cecddc805 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 21 Feb 2009 23:52:29 -0800 Subject: tcp: Always set urgent pointer if it's beyond snd_nxt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our TCP stack does not set the urgent flag if the urgent pointer does not fit in 16 bits, i.e., if it is more than 64K from the sequence number of a packet. This behaviour is different from the BSDs, and clearly contradicts the purpose of urgent mode, which is to send the notification (though not necessarily the associated data) as soon as possible. Our current behaviour may in fact delay the urgent notification indefinitely if the receiver window does not open up. Simply matching BSD however may break legacy applications which incorrectly rely on the out-of-band delivery of urgent data, and conversely the in-band delivery of non-urgent data. Alexey Kuznetsov suggested a safe solution of following BSD only if the urgent pointer itself has not yet been transmitted. This way we guarantee that when the remote end sees the packet with non-urgent data marked as urgent due to wrap-around we would have advanced the urgent pointer beyond, either to the actual urgent data or to an as-yet untransmitted packet. The only potential downside is that applications on the remote end may see multiple SIGURG notifications. However, this would occur anyway with other TCP stacks. More importantly, the outcome of such a duplicate notification is likely to be harmless since the signal itself does not carry any information other than the fact that we're in urgent mode. Thanks to Ilpo Järvinen for fixing a critical bug in this and Jeff Chua for reporting that bug. Signed-off-by: Herbert Xu Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dda42f0bd7a3..f5263c840338 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -663,10 +663,14 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->urg_ptr = 0; /* The urg_mode check is necessary during a below snd_una win probe */ - if (unlikely(tcp_urg_mode(tp) && - between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { - th->urg_ptr = htons(tp->snd_up - tcb->seq); - th->urg = 1; + if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) { + if (before(tp->snd_up, tcb->seq + 0x10000)) { + th->urg_ptr = htons(tp->snd_up - tcb->seq); + th->urg = 1; + } else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) { + th->urg_ptr = 0xFFFF; + th->urg = 1; + } } tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); -- cgit v1.2.3 From c1cf8422f0512c2b14f0d66bce34abb0645c888a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Feb 2009 08:25:36 +0000 Subject: ip: add loose reverse path filtering Extend existing reverse path filter option to allow strict or loose filtering. (See http://en.wikipedia.org/wiki/Reverse_path_filtering). For compatibility with existing usage, the value 1 is chosen for strict mode and 2 for loose mode. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/fib_frontend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 741e4fa3e474..cafcc49d0993 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -275,7 +275,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, fib_res_put(&res); if (no_addr) goto last_resort; - if (rpf) + if (rpf == 1) goto e_inval; fl.oif = dev->ifindex; -- cgit v1.2.3 From 5747a1aacde268017784a6a56df06c3b40194381 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 22 Feb 2009 00:02:08 -0800 Subject: ip: ipip compile warning Get rid of compile warning about non-const format Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 5079dfbc6f38..c49c4ecfb154 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -750,7 +750,7 @@ static struct xfrm_tunnel ipip_handler = { .priority = 1, }; -static char banner[] __initdata = +static const char banner[] __initconst = KERN_INFO "IPv4 over IPv4 tunneling driver\n"; static void ipip_destroy_tunnels(struct ipip_net *ipn) -- cgit v1.2.3 From 01af4a0e3cce86212f6bf54ebe8f84d7219c862a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 22 Feb 2009 00:02:44 -0800 Subject: llc: fix non-const printk warning Mark some strings as const. Signed-off-by: Stephen Hemminger Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/llc/af_llc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 56fd85ab358e..febae702685c 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -1118,11 +1118,11 @@ static const struct proto_ops llc_ui_ops = { .sendpage = sock_no_sendpage, }; -static char llc_proc_err_msg[] __initdata = +static const char llc_proc_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the proc_fs entries\n"; -static char llc_sysctl_err_msg[] __initdata = +static const char llc_sysctl_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the sysctl entries\n"; -static char llc_sock_err_msg[] __initdata = +static const char llc_sock_err_msg[] __initconst = KERN_CRIT "LLC: Unable to register the network family\n"; static int __init llc2_init(void) -- cgit v1.2.3 From 0117cfabe3ba9b430c6ff6eecd4fdc569977f24f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 22 Feb 2009 00:03:19 -0800 Subject: snap: handle registration error and compile warning If this module can't load, it is almost certainly because something else is already bound to that SAP. So in that case, return the same error code as other SAP usage, and fail the module load. Also fixes a compiler warning about printk of non const. Signed-off-by: Stephen Hemminger Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/802/psnap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/802/psnap.c b/net/802/psnap.c index 6ed711748f26..bdbffa3cb043 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -95,15 +95,16 @@ static int snap_request(struct datalink_proto *dl, EXPORT_SYMBOL(register_snap_client); EXPORT_SYMBOL(unregister_snap_client); -static char snap_err_msg[] __initdata = +static const char snap_err_msg[] __initconst = KERN_CRIT "SNAP - unable to register with 802.2\n"; static int __init snap_init(void) { snap_sap = llc_sap_open(0xAA, snap_rcv); - - if (!snap_sap) + if (!snap_sap) { printk(snap_err_msg); + return -EBUSY; + } return 0; } -- cgit v1.2.3 From b2cc46a8ee905d1d642e01761939879c495e3e3a Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Sun, 22 Feb 2009 00:06:20 -0800 Subject: ipv4: Fix rp_filter description in net/ipv4/Kconfig. The reverse path filter (rp_filter) will NOT get enabled when enabling forwarding. Read the code and tested in in practice. Most distributions do enable it in startup scripts. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 691268f3a359..10c944d42488 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -35,7 +35,7 @@ config IP_ADVANCED_ROUTER at boot time after the /proc file system has been mounted. - If you turn on IP forwarding, you will also get the rp_filter, which + If you turn on IP forwarding, you should consider the rp_filter, which automatically rejects incoming packets if the routing table entry for their source address doesn't match the network interface they're arriving on. This has security advantages because it prevents the @@ -46,9 +46,11 @@ config IP_ADVANCED_ROUTER rp_filter on use: echo 1 > /proc/sys/net/ipv4/conf//rp_filter - or + and echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter + Note that some distributions enable it in startup scripts. + If unsure, say N here. choice -- cgit v1.2.3 From a6e8f27f3c02b0c0d0c45d14b2f4a8810c9a7dc7 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Sun, 22 Feb 2009 00:07:13 -0800 Subject: ipv4: Clean whitespaces in net/ipv4/Kconfig. While going through net/ipv4/Kconfig cleanup whitespaces. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 10c944d42488..c7a814860ce1 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -53,7 +53,7 @@ config IP_ADVANCED_ROUTER If unsure, say N here. -choice +choice prompt "Choose IP: FIB lookup algorithm (choose FIB_HASH if unsure)" depends on IP_ADVANCED_ROUTER default ASK_IP_FIB_HASH @@ -61,27 +61,29 @@ choice config ASK_IP_FIB_HASH bool "FIB_HASH" ---help--- - Current FIB is very proven and good enough for most users. + Current FIB is very proven and good enough for most users. config IP_FIB_TRIE bool "FIB_TRIE" ---help--- - Use new experimental LC-trie as FIB lookup algorithm. - This improves lookup performance if you have a large - number of routes. - - LC-trie is a longest matching prefix lookup algorithm which - performs better than FIB_HASH for large routing tables. - But, it consumes more memory and is more complex. - - LC-trie is described in: - - IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson - IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999 - An experimental study of compression methods for dynamic tries - Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. - http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ - + Use new experimental LC-trie as FIB lookup algorithm. + This improves lookup performance if you have a large + number of routes. + + LC-trie is a longest matching prefix lookup algorithm which + performs better than FIB_HASH for large routing tables. + But, it consumes more memory and is more complex. + + LC-trie is described in: + + IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson + IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, + June 1999 + + An experimental study of compression methods for dynamic tries + Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002. + http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/ + endchoice config IP_FIB_HASH @@ -193,7 +195,7 @@ config IP_PNP_RARP for details. # not yet ready.. -# bool ' IP: ARP support' CONFIG_IP_PNP_ARP +# bool ' IP: ARP support' CONFIG_IP_PNP_ARP config NET_IPIP tristate "IP: tunneling" select INET_TUNNEL @@ -363,7 +365,7 @@ config INET_IPCOMP ---help--- Support for IP Payload Compression Protocol (IPComp) (RFC3173), typically needed for IPsec. - + If unsure, say Y. config INET_XFRM_TUNNEL @@ -417,7 +419,7 @@ config INET_DIAG Support for INET (TCP, DCCP, etc) socket monitoring interface used by native Linux tools such as ss. ss is included in iproute2, currently downloadable at . - + If unsure, say Y. config INET_TCP_DIAG -- cgit v1.2.3 From 959d2726499175c6b724d87fed1dafca7582efe2 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Feb 2009 00:09:14 -0800 Subject: netns: Fix icmp shutdown. Recently I had a kernel panic in icmp_send during a network namespace cleanup. There were packets in the arp queue that failed to be sent and we attempted to generate an ICMP host unreachable message, but failed because icmp_sk_exit had already been called. The network devices are removed from a network namespace and their arp queues are flushed before we do attempt to shutdown subsystems so this error should have been impossible. It turns out icmp_init is using register_pernet_device instead of register_pernet_subsys. Which resulted in icmp being shut down while we still had the possibility of packets in flight, making a nasty NULL pointer deference in interrupt context possible. Changing this to register_pernet_subsys fixes the problem in my testing. Signed-off-by: Eric W. Biederman Acked-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 382800a62b31..3f50807237e0 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1207,7 +1207,7 @@ static struct pernet_operations __net_initdata icmp_sk_ops = { int __init icmp_init(void) { - return register_pernet_device(&icmp_sk_ops); + return register_pernet_subsys(&icmp_sk_ops); } EXPORT_SYMBOL(icmp_err_convert); -- cgit v1.2.3 From 6a1b3054d9fd98001a6631501caf1969138ee00d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Feb 2009 00:10:18 -0800 Subject: tcp: Like icmp use register_pernet_subsys To remove the possibility of packets flying around when network devices are being cleaned up use reisger_pernet_subsys instead of register_pernet_device. Signed-off-by: Eric W. Biederman Acked-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f6b962f56ab4..a7381205bbfc 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2443,7 +2443,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { inet_hashinfo_init(&tcp_hashinfo); - if (register_pernet_device(&tcp_sk_ops)) + if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } -- cgit v1.2.3 From ce16c5337ab0d165f95c88aa857207efd7c01139 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Feb 2009 00:11:09 -0800 Subject: netns: Remove net_alive It turns out that net_alive is unnecessary, and the original problem that led to it being added was simply that the icmp code thought it was a network device and wound up being unable to handle packets while there were still packets in the network namespace. Now that icmp and tcp have been fixed to properly register themselves this problem is no longer present and we have a stronger guarantee that packets will not arrive in a network namespace then that provided by net_alive in netif_receive_skb. So remove net_alive allowing packet reception run a little faster. Additionally document the strong reason why network namespace cleanup is safe so that if something happens again someone else will have a chance of figuring it out. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 6 ------ net/core/net_namespace.c | 3 --- 2 files changed, 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 88dc082b47d1..ac6ab12d3297 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2254,12 +2254,6 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); - /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) { - kfree_skb(skb); - goto out; - } - #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 55151faaf90c..516c7b154327 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -140,9 +140,6 @@ static void cleanup_net(struct work_struct *work) struct pernet_operations *ops; struct net *net; - /* Be very certain incoming network packets will not find us */ - rcu_barrier(); - net = container_of(work, struct net, work); mutex_lock(&net_mutex); -- cgit v1.2.3 From d18921a0e319ab512f8186b1b1142c7b8634c779 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 23 Feb 2009 04:40:43 +0000 Subject: Doc: Refer to ip-sysctl.txt for strict vs. loose rp_filter mode The IP_ADVANCED_ROUTER Kconfig describes the rp_filter proc option. Recent changes added a loose mode. Instead of documenting this change too places, refer to the document describing it: Documentation/networking/ip-sysctl.txt I'm considering moving the rp_filter description away from the Kconfig file into ip-sysctl.txt. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- net/ipv4/Kconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index c7a814860ce1..b2cf91e4ccaa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -50,6 +50,8 @@ config IP_ADVANCED_ROUTER echo 1 > /proc/sys/net/ipv4/conf/all/rp_filter Note that some distributions enable it in startup scripts. + For details about rp_filter strict and loose mode read + . If unsure, say N here. -- cgit v1.2.3 From 7d1e04598e5e92527840b6889fb75b4b30fdd33b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Feb 2009 14:48:01 +0100 Subject: netfilter: nf_conntrack: account packets drop by tcp_packet() Since tcp_packet() may return -NF_DROP in two situations, the packet-drop stats must be increased. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 2235432c59d1..ebc275600125 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -734,6 +734,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, nf_conntrack_put(skb->nfct); skb->nfct = NULL; NF_CT_STAT_INC_ATOMIC(net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(net, drop); return -ret; } -- cgit v1.2.3 From 28337ff5438a640afa713d874d076e3a8a9150da Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Feb 2009 15:30:29 +0100 Subject: netfilter: xt_hashlimit fix Commit 784544739a25c30637397ace5489eeb6e15d7d49 (netfilter: iptables: lock free counters) broke xt_hashlimit netfilter module : This module was storing a pointer inside its xt_hashlimit_info, and this pointer is not relocated when we temporarly switch tables (iptables -L). This hack is not not needed at all (probably a leftover from ancient time), as each cpu should and can access to its own copy. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/xt_hashlimit.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2482055e1c56..a5b5369c30f9 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -565,8 +565,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, static bool hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_hashlimit_info *r = - ((const struct xt_hashlimit_info *)par->matchinfo)->u.master; + const struct xt_hashlimit_info *r = par->matchinfo; struct xt_hashlimit_htable *hinfo = r->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; @@ -702,8 +701,6 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par) } mutex_unlock(&hlimit_mutex); - /* Ugly hack: For SMP, we only want to use one set */ - r->u.master = r; return true; } -- cgit v1.2.3 From 1ce85fe402137824246bad03ff85f3913d565c17 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Feb 2009 23:18:28 -0800 Subject: netlink: change nlmsg_notify() return value logic This patch changes the return value of nlmsg_notify() as follows: If NETLINK_BROADCAST_ERROR is set by any of the listeners and an error in the delivery happened, return the broadcast error; else if there are no listeners apart from the socket that requested a change with the echo flag, return the result of the unicast notification. Thus, with this patch, the unicast notification is handled in the same way of a broadcast listener that has set the NETLINK_BROADCAST_ERROR socket flag. This patch is useful in case that the caller of nlmsg_notify() wants to know the result of the delivery of a netlink notification (including the broadcast delivery) and take any action in case that the delivery failed. For example, ctnetlink can drop packets if the event delivery failed to provide reliable logging and state-synchronization at the cost of dropping packets. This patch also modifies the rtnetlink code to ignore the return value of rtnl_notify() in all callers. The function rtnl_notify() (before this patch) returned the error of the unicast notification which makes rtnl_set_sk_err() reports errors to all listeners. This is not of any help since the origin of the change (the socket that requested the echoing) notices the ENOBUFS error if the notification fails and should resync itself. Signed-off-by: Pablo Neira Ayuso Acked-by: Patrick McHardy Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 3 ++- net/core/fib_rules.c | 3 ++- net/core/neighbour.c | 3 ++- net/core/rtnetlink.c | 9 +++++---- net/decnet/dn_dev.c | 3 ++- net/decnet/dn_table.c | 3 ++- net/ipv4/devinet.c | 3 ++- net/ipv4/fib_semantics.c | 5 +++-- net/ipv6/addrconf.c | 9 ++++++--- net/ipv6/ndisc.c | 6 +----- net/ipv6/route.c | 5 +++-- net/netlink/af_netlink.c | 14 ++++++++++---- net/phonet/pn_netlink.c | 5 +++-- 13 files changed, 43 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index ba7be195803c..fcffb3fb1177 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -98,7 +98,8 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_LINK, err); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 32b3a0152d7a..98691e1466b8 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule, goto errout; } - err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(net, ops->nlgroup, err); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 278a142d1047..e1144cb94b99 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2534,7 +2534,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 790dd205bb5d..d78030f88bd0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -455,8 +455,8 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid) return nlmsg_unicast(rtnl, skb, pid); } -int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, - struct nlmsghdr *nlh, gfp_t flags) +void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) { struct sock *rtnl = net->rtnl; int report = 0; @@ -464,7 +464,7 @@ int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, if (nlh) report = nlmsg_report(nlh); - return nlmsg_notify(rtnl, skb, pid, group, report, flags); + nlmsg_notify(rtnl, skb, pid, group, report, flags); } void rtnl_set_sk_err(struct net *net, u32 group, int error) @@ -1246,7 +1246,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_LINK, err); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index daf2b98b15fe..e457769bf7a7 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -769,7 +769,8 @@ static void dn_ifaddr_notify(int event, struct dn_ifaddr *ifa) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); + rtnl_notify(skb, &init_net, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_IFADDR, err); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 69ad9280c693..67054b0d550f 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -375,7 +375,8 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(&init_net, RTNLGRP_DECnet_ROUTE, err); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index d519a6a66726..126bb911880f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1216,7 +1216,8 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 4817dea3bc73..f831df500907 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -322,8 +322,9 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, - info->nlh, GFP_KERNEL); + rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, + info->nlh, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(info->nl_net, RTNLGRP_IPV4_ROUTE, err); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 03e2a1ad71e9..f8f76d6e21cb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3638,7 +3638,8 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); @@ -3849,7 +3850,8 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_IFADDR, err); @@ -3919,7 +3921,8 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 3cd83b85e9ef..9f061d1adbc2 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1095,11 +1095,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) &ipv6_hdr(ra)->saddr); nlmsg_end(skb, nlh); - err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, - GFP_ATOMIC); - if (err < 0) - goto errout; - + rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); return; nla_put_failure: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c3d486a3edad..1394ddb6e35c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2400,8 +2400,9 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, - info->nlh, gfp_any()); + rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + info->nlh, gfp_any()); + return; errout: if (err < 0) rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index ed587be1e1c2..2760b62dc2c1 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1760,12 +1760,18 @@ int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, exclude_pid = pid; } - /* errors reported via destination sk->sk_err */ - nlmsg_multicast(sk, skb, exclude_pid, group, flags); + /* errors reported via destination sk->sk_err, but propagate + * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ + err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); } - if (report) - err = nlmsg_unicast(sk, skb, pid); + if (report) { + int err2; + + err2 = nlmsg_unicast(sk, skb, pid); + if (!err || err == -ESRCH) + err = err2; + } return err; } diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 1ceea1f92413..cec4e5951681 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -47,8 +47,9 @@ static void rtmsg_notify(int event, struct net_device *dev, u8 addr) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, dev_net(dev), 0, - RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); + rtnl_notify(skb, dev_net(dev), 0, + RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); + return; errout: if (err < 0) rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); -- cgit v1.2.3 From 800d55f146098c5ffd1914d7eef2fd4a6d558b1d Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 23 Feb 2009 21:45:33 +0000 Subject: ipv6: Remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). The semantic match that finds the problem is as follows: (http://www.emn.fr/x-info/coccinelle/) // @@ expression E; @@ - if (E) - kfree_skb(E); + kfree_skb(E); // Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/ipv6_sockglue.c | 3 +-- net/ipv6/tcp_ipv6.c | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 40f324655e24..d31df0f4bc9a 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -218,8 +218,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (opt) sock_kfree_s(sk, opt, opt->tot_len); pktopt = xchg(&np->pktoptions, NULL); - if (pktopt) - kfree_skb(pktopt); + kfree_skb(pktopt); sk->sk_destruct = inet_sock_destruct; /* diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 00f1269e11e9..4b5aa1854260 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -533,8 +533,7 @@ static inline void syn_flood_warning(struct sk_buff *skb) static void tcp_v6_reqsk_destructor(struct request_sock *req) { - if (inet6_rsk(req)->pktopts) - kfree_skb(inet6_rsk(req)->pktopts); + kfree_skb(inet6_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG @@ -1611,8 +1610,7 @@ ipv6_pktoptions: } } - if (opt_skb) - kfree_skb(opt_skb); + kfree_skb(opt_skb); return 0; } -- cgit v1.2.3 From da6185d8743704bf7647c0aedaf8c7879d8f3cab Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 24 Feb 2009 23:34:48 -0800 Subject: gre: used time_before for comparing jiffies The functions time_before is more robust for comparing jiffies against other values. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 07a188afb3ac..e62510d5ea5a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -491,7 +491,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; - if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) + if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; @@ -803,7 +803,8 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) #endif if (tunnel->err_count > 0) { - if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { + if (time_before(jiffies, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; dst_link_failure(skb); -- cgit v1.2.3 From 26d94b46d09c97adb3c78c744c195e74ede699b2 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 24 Feb 2009 23:36:47 -0800 Subject: ipip: used time_before for comparing jiffies The functions time_before is more robust for comparing jiffies against other values. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/ipip.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c49c4ecfb154..9054139795af 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -327,7 +327,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; - if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) + if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; @@ -466,7 +466,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (tunnel->err_count > 0) { - if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { + if (time_before(jiffies, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; dst_link_failure(skb); } else -- cgit v1.2.3 From bb80087a9440178bcd8363dc27a486f34786fec3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 24 Feb 2009 23:37:19 -0800 Subject: sit: used time_before for comparing jiffies The functions time_before is more robust for comparing jiffies against other values. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/sit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d3467e563f02..26915effb3bc 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -454,7 +454,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; - if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO) + if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; @@ -658,7 +658,8 @@ static int ipip6_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) } if (tunnel->err_count > 0) { - if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) { + if (time_before(jiffies, + tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; dst_link_failure(skb); } else -- cgit v1.2.3 From 0dcec8c27ba44cd11c6e68c46d5fd553818a3837 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 25 Feb 2009 14:07:33 +0100 Subject: alloc_percpu: add align argument to __alloc_percpu, fix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Impact: build fix API was changed, but not all usage sites were converted: net/ipv4/route.c: In function ‘ip_rt_init’: net/ipv4/route.c:3379: error: too few arguments to function ‘__alloc_percpu’ Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Ingo Molnar --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 97f71153584f..bf895401218f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3376,7 +3376,7 @@ int __init ip_rt_init(void) int rc = 0; #ifdef CONFIG_NET_CLS_ROUTE - ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct)); + ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) panic("IP: failed to allocate ip_rt_acct\n"); #endif -- cgit v1.2.3 From 91aa35a5aa3540223066bf6b51c935418c63a35d Mon Sep 17 00:00:00 2001 From: Victor Shcherbatyuk Date: Thu, 15 Jan 2009 21:52:12 +0100 Subject: Bluetooth: Fix issue with return value of rfcomm_sock_sendmsg() In case of connection failures the rfcomm_sock_sendmsg() should return an error and not a 0 value. Signed-off-by: Victor Shcherbatyuk Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/sock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index d3fc6fca38d0..ce505f2a755b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -570,8 +570,11 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, skb = sock_alloc_send_skb(sk, size + RFCOMM_SKB_RESERVE, msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) + if (!skb) { + if (sent == 0) + sent = err; break; + } skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE); err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); -- cgit v1.2.3 From d58daf42d29a3a4a4d4be46cf47ceee096789680 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:52:14 +0100 Subject: Bluetooth: Preparation for usage of SOL_BLUETOOTH The socket option levels SOL_L2CAP, SOL_RFOMM and SOL_SCO are currently in use by various Bluetooth applications. Going forward the common option level SOL_BLUETOOTH should be used. This patch prepares the clean split of the old and new option levels while keeping everything backward compatibility. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 51 +++++++++++++++++++++++++++++++++++++++++++-- net/bluetooth/rfcomm/sock.c | 51 +++++++++++++++++++++++++++++++++++++++++++-- net/bluetooth/sco.c | 27 +++++++++++++++++++++++- 3 files changed, 124 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index b93748e224ff..df1a95e185c6 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1106,7 +1106,7 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms return err; } -static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) +static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; struct l2cap_options opts; @@ -1152,7 +1152,29 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch return err; } -static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sk %p", sk); + + if (level == SOL_L2CAP) + return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); + + lock_sock(sk); + + switch (optname) { + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct l2cap_options opts; @@ -1208,6 +1230,31 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch return err; } +static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (level == SOL_L2CAP) + return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + static int l2cap_sock_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index ce505f2a755b..65dd7133d72b 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -713,7 +713,7 @@ out: return copied ? : err; } -static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) +static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; int err = 0; @@ -742,7 +742,29 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c return err; } -static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) +{ + struct sock *sk = sock->sk; + int err = 0; + + BT_DBG("sk %p", sk); + + if (level == SOL_RFCOMM) + return rfcomm_sock_setsockopt_old(sock, optname, optval, optlen); + + lock_sock(sk); + + switch (optname) { + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + +static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct sock *l2cap_sk; @@ -788,6 +810,31 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c return err; } +static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (level == SOL_RFCOMM) + return rfcomm_sock_getsockopt_old(sock, optname, optval, optlen); + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk __maybe_unused = sock->sk; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 46fd8bf9a690..dea40d4bb6f5 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -668,7 +668,7 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, char return err; } -static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +static int sco_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct sco_options opts; @@ -723,6 +723,31 @@ static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char return err; } +static int sco_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + int len, err = 0; + + BT_DBG("sk %p", sk); + + if (level == SOL_SCO) + return sco_sock_getsockopt_old(sock, optname, optval, optlen); + + if (get_user(len, optlen)) + return -EFAULT; + + lock_sock(sk); + + switch (optname) { + default: + err = -ENOPROTOOPT; + break; + } + + release_sock(sk); + return err; +} + static int sco_sock_release(struct socket *sock) { struct sock *sk = sock->sk; -- cgit v1.2.3 From c4f912e155504e94dd4f3d63c378dab0ff03dbda Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:52:16 +0100 Subject: Bluetooth: Add global deferred socket parameter The L2CAP and RFCOMM applications require support for authorization and the ability of rejecting incoming connection requests. The socket interface is not really able to support this. This patch does the ground work for a socket option to defer connection setup. Setting this option allows calling of accept() and then the first read() will trigger the final connection setup. Calling close() would reject the connection. Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 744ed3f07ef3..7c0031ff8cfb 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -217,7 +217,8 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) continue; } - if (sk->sk_state == BT_CONNECTED || !newsock) { + if (sk->sk_state == BT_CONNECTED || !newsock || + bt_sk(parent)->defer_setup) { bt_accept_unlink(sk); if (newsock) sock_graft(sk, newsock); @@ -232,7 +233,7 @@ struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) EXPORT_SYMBOL(bt_accept_dequeue); int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t len, int flags) + struct msghdr *msg, size_t len, int flags) { int noblock = flags & MSG_DONTWAIT; struct sock *sk = sock->sk; @@ -275,6 +276,9 @@ static inline unsigned int bt_accept_poll(struct sock *parent) struct list_head *p, *n; struct sock *sk; + if (bt_sk(parent)->defer_setup) + return POLLIN | POLLRDNORM; + list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); if (sk->sk_state == BT_CONNECTED) -- cgit v1.2.3 From bb23c0ab824653be4aa7dfca15b07b3059717004 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:56:48 +0100 Subject: Bluetooth: Add support for deferring RFCOMM connection setup In order to decide if listening RFCOMM sockets should be accept()ed the BD_ADDR of the remote device needs to be known. This patch adds a socket option which defines a timeout for deferring the actual connection setup. The connection setup is done after reading from the socket for the first time. Until then writing to the socket returns ENOTCONN. Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 56 +++++++++++++++++++++++++++++++++------------ net/bluetooth/rfcomm/sock.c | 44 ++++++++++++++++++++++++++++++++++- 2 files changed, 84 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index acd84fd524b8..edee49e00fbf 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -421,9 +421,16 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) d, d->state, d->dlci, err, s); switch (d->state) { - case BT_CONNECTED: - case BT_CONFIG: case BT_CONNECT: + case BT_CONFIG: + if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { + set_bit(RFCOMM_AUTH_REJECT, &d->flags); + rfcomm_schedule(RFCOMM_SCHED_AUTH); + break; + } + /* Fall through */ + + case BT_CONNECTED: d->state = BT_DISCONN; if (skb_queue_empty(&d->tx_queue)) { rfcomm_send_disc(s, d->dlci); @@ -434,6 +441,14 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) } break; + case BT_OPEN: + if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { + set_bit(RFCOMM_AUTH_REJECT, &d->flags); + rfcomm_schedule(RFCOMM_SCHED_AUTH); + break; + } + /* Fall through */ + default: rfcomm_dlc_clear_timer(d); @@ -1162,7 +1177,7 @@ static int rfcomm_recv_disc(struct rfcomm_session *s, u8 dlci) return 0; } -static void rfcomm_dlc_accept(struct rfcomm_dlc *d) +void rfcomm_dlc_accept(struct rfcomm_dlc *d) { struct sock *sk = d->session->sock->sk; @@ -1181,6 +1196,20 @@ static void rfcomm_dlc_accept(struct rfcomm_dlc *d) rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); } +static void rfcomm_check_accept(struct rfcomm_dlc *d) +{ + if (rfcomm_check_link_mode(d)) { + set_bit(RFCOMM_AUTH_PENDING, &d->flags); + rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); + } else { + if (d->defer_setup) { + set_bit(RFCOMM_DEFER_SETUP, &d->flags); + rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); + } else + rfcomm_dlc_accept(d); + } +} + static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) { struct rfcomm_dlc *d; @@ -1203,11 +1232,7 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) if (d) { if (d->state == BT_OPEN) { /* DLC was previously opened by PN request */ - if (rfcomm_check_link_mode(d)) { - set_bit(RFCOMM_AUTH_PENDING, &d->flags); - rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); - } else - rfcomm_dlc_accept(d); + rfcomm_check_accept(d); } return 0; } @@ -1219,11 +1244,7 @@ static int rfcomm_recv_sabm(struct rfcomm_session *s, u8 dlci) d->addr = __addr(s->initiator, dlci); rfcomm_dlc_link(s, d); - if (rfcomm_check_link_mode(d)) { - set_bit(RFCOMM_AUTH_PENDING, &d->flags); - rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); - } else - rfcomm_dlc_accept(d); + rfcomm_check_accept(d); } else { rfcomm_send_dm(s, dlci); } @@ -1717,8 +1738,13 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) if (d->out) { rfcomm_send_pn(s, 1, d); rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); - } else - rfcomm_dlc_accept(d); + } else { + if (d->defer_setup) { + set_bit(RFCOMM_DEFER_SETUP, &d->flags); + rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); + } else + rfcomm_dlc_accept(d); + } if (d->link_mode & RFCOMM_LM_SECURE) { struct sock *sk = s->sock->sk; hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 65dd7133d72b..d37a829a81e4 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -262,8 +262,10 @@ static void rfcomm_sock_init(struct sock *sk, struct sock *parent) if (parent) { sk->sk_type = parent->sk_type; pi->link_mode = rfcomm_pi(parent)->link_mode; + pi->dlc->defer_setup = bt_sk(parent)->defer_setup; } else { pi->link_mode = 0; + pi->dlc->defer_setup = 0; } pi->dlc->link_mode = pi->link_mode; @@ -554,6 +556,9 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct sk_buff *skb; int sent = 0; + if (test_bit(RFCOMM_DEFER_SETUP, &d->flags)) + return -ENOTCONN; + if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; @@ -633,10 +638,16 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; + struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; int err = 0; size_t target, copied = 0; long timeo; + if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { + rfcomm_dlc_accept(d); + return 0; + } + if (flags & MSG_OOB) return -EOPNOTSUPP; @@ -746,6 +757,7 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c { struct sock *sk = sock->sk; int err = 0; + u32 opt; BT_DBG("sk %p", sk); @@ -755,6 +767,20 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c lock_sock(sk); switch (optname) { + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + bt_sk(sk)->defer_setup = opt; + break; + default: err = -ENOPROTOOPT; break; @@ -785,7 +811,8 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u break; case RFCOMM_CONNINFO: - if (sk->sk_state != BT_CONNECTED) { + if (sk->sk_state != BT_CONNECTED && + !rfcomm_pi(sk)->dlc->defer_setup) { err = -ENOTCONN; break; } @@ -826,6 +853,17 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c lock_sock(sk); switch (optname) { + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) + err = -EFAULT; + + break; + default: err = -ENOPROTOOPT; break; @@ -938,6 +976,10 @@ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc * done: bh_unlock_sock(parent); + + if (bt_sk(parent)->defer_setup) + parent->sk_state_change(parent); + return result; } -- cgit v1.2.3 From f66dc81f44d918ee1aa1a9d821bb2f25c7592bc0 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:57:00 +0100 Subject: Bluetooth: Add support for deferring L2CAP connection setup In order to decide if listening L2CAP sockets should be accept()ed the BD_ADDR of the remote device needs to be known. This patch adds a socket option which defines a timeout for deferring the actual connection setup. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 110 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index df1a95e185c6..123efb46d3f5 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -372,9 +372,17 @@ static void l2cap_conn_start(struct l2cap_conn *conn) rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); if (l2cap_check_link_mode(sk)) { - sk->sk_state = BT_CONFIG; - rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + if (bt_sk(sk)->defer_setup) { + struct sock *parent = bt_sk(sk)->parent; + rsp.result = cpu_to_le16(L2CAP_CR_PEND); + rsp.status = cpu_to_le16(L2CAP_CS_AUTHOR_PEND); + parent->sk_data_ready(parent, 0); + + } else { + sk->sk_state = BT_CONFIG; + rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + } } else { rsp.result = cpu_to_le16(L2CAP_CR_PEND); rsp.status = cpu_to_le16(L2CAP_CS_AUTHEN_PEND); @@ -608,7 +616,6 @@ static void __l2cap_sock_close(struct sock *sk, int reason) case BT_CONNECTED: case BT_CONFIG: - case BT_CONNECT2: if (sk->sk_type == SOCK_SEQPACKET) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; struct l2cap_disconn_req req; @@ -624,6 +631,27 @@ static void __l2cap_sock_close(struct sock *sk, int reason) l2cap_chan_del(sk, reason); break; + case BT_CONNECT2: + if (sk->sk_type == SOCK_SEQPACKET) { + struct l2cap_conn *conn = l2cap_pi(sk)->conn; + struct l2cap_conn_rsp rsp; + __u16 result; + + if (bt_sk(sk)->defer_setup) + result = L2CAP_CR_SEC_BLOCK; + else + result = L2CAP_CR_BAD_PSM; + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(result); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + } else + l2cap_chan_del(sk, reason); + break; + case BT_CONNECT: case BT_DISCONN: l2cap_chan_del(sk, reason); @@ -653,6 +681,8 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) if (parent) { sk->sk_type = parent->sk_type; + bt_sk(sk)->defer_setup = bt_sk(parent)->defer_setup; + pi->imtu = l2cap_pi(parent)->imtu; pi->omtu = l2cap_pi(parent)->omtu; pi->link_mode = l2cap_pi(parent)->link_mode; @@ -1106,6 +1136,33 @@ static int l2cap_sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct ms return err; } +static int l2cap_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags) +{ + struct sock *sk = sock->sk; + + lock_sock(sk); + + if (sk->sk_state == BT_CONNECT2 && bt_sk(sk)->defer_setup) { + struct l2cap_conn_rsp rsp; + + sk->sk_state = BT_CONFIG; + + rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); + rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); + rsp.result = cpu_to_le16(L2CAP_CR_SUCCESS); + rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); + l2cap_send_cmd(l2cap_pi(sk)->conn, l2cap_pi(sk)->ident, + L2CAP_CONN_RSP, sizeof(rsp), &rsp); + + release_sock(sk); + return 0; + } + + release_sock(sk); + + return bt_sock_recvmsg(iocb, sock, msg, len, flags); +} + static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; @@ -1156,6 +1213,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch { struct sock *sk = sock->sk; int err = 0; + u32 opt; BT_DBG("sk %p", sk); @@ -1165,6 +1223,20 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch lock_sock(sk); switch (optname) { + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (get_user(opt, (u32 __user *) optval)) { + err = -EFAULT; + break; + } + + bt_sk(sk)->defer_setup = opt; + break; + default: err = -ENOPROTOOPT; break; @@ -1207,7 +1279,9 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us break; case L2CAP_CONNINFO: - if (sk->sk_state != BT_CONNECTED) { + if (sk->sk_state != BT_CONNECTED && + !(sk->sk_state == BT_CONNECT2 && + bt_sk(sk)->defer_setup)) { err = -ENOTCONN; break; } @@ -1246,6 +1320,17 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch lock_sock(sk); switch (optname) { + case BT_DEFER_SETUP: + if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { + err = -EINVAL; + break; + } + + if (put_user(bt_sk(sk)->defer_setup, (u32 __user *) optval)) + err = -EFAULT; + + break; + default: err = -ENOPROTOOPT; break; @@ -1670,9 +1755,16 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { if (l2cap_check_link_mode(sk)) { - sk->sk_state = BT_CONFIG; - result = L2CAP_CR_SUCCESS; - status = L2CAP_CS_NO_INFO; + if (bt_sk(sk)->defer_setup) { + sk->sk_state = BT_CONNECT2; + result = L2CAP_CR_PEND; + status = L2CAP_CS_AUTHOR_PEND; + parent->sk_data_ready(parent, 0); + } else { + sk->sk_state = BT_CONFIG; + result = L2CAP_CR_SUCCESS; + status = L2CAP_CS_NO_INFO; + } } else { sk->sk_state = BT_CONNECT2; result = L2CAP_CR_PEND; @@ -2494,7 +2586,7 @@ static const struct proto_ops l2cap_sock_ops = { .accept = l2cap_sock_accept, .getname = l2cap_sock_getname, .sendmsg = l2cap_sock_sendmsg, - .recvmsg = bt_sock_recvmsg, + .recvmsg = l2cap_sock_recvmsg, .poll = bt_sock_poll, .ioctl = bt_sock_ioctl, .mmap = sock_no_mmap, -- cgit v1.2.3 From 71aeeaa1fd88fe7446391e0553336f0e0c2cfe6a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:57:02 +0100 Subject: Bluetooth: Reject incoming SCO connections without listeners All SCO and eSCO connection are auto-accepted no matter if there is a corresponding listening socket for them. This patch changes this and connection requests for SCO and eSCO without any socket are rejected. Signed-off-by: Marcel Holtmann --- net/bluetooth/sco.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index dea40d4bb6f5..71df982c09c9 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -857,10 +857,30 @@ done: /* ----- SCO interface with lower layer (HCI) ----- */ static int sco_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, __u8 type) { + register struct sock *sk; + struct hlist_node *node; + int lm = 0; + + if (type != SCO_LINK && type != ESCO_LINK) + return 0; + BT_DBG("hdev %s, bdaddr %s", hdev->name, batostr(bdaddr)); - /* Always accept connection */ - return HCI_LM_ACCEPT; + /* Find listening sockets */ + read_lock(&sco_sk_list.lock); + sk_for_each(sk, node, &sco_sk_list.head) { + if (sk->sk_state != BT_LISTEN) + continue; + + if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr) || + !bacmp(&bt_sk(sk)->src, BDADDR_ANY)) { + lm |= HCI_LM_ACCEPT; + break; + } + } + read_unlock(&sco_sk_list.lock); + + return lm; } static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) -- cgit v1.2.3 From c89b6e6bda4c8021195778f47567d0cc9dbfe7ec Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:57:03 +0100 Subject: Bluetooth: Fix SCO state handling for incoming connections When the remote device supports only SCO connections, on receipt of the HCI_EV_CONN_COMPLETE event packet, the connect state is changed to BT_CONNECTED, but the socket state is not updated. Hence, the connect() call times out even though the SCO connection has been successfully established. Based on a report by Jaikumar Ganesh Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index f91ba690f5d2..beea9dbb6562 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -914,7 +914,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (ev->status) { hci_proto_connect_cfm(conn, ev->status); hci_conn_del(conn); - } + } else if (ev->link_type != ACL_LINK) + hci_proto_connect_cfm(conn, ev->status); unlock: hci_dev_unlock(hdev); -- cgit v1.2.3 From 8c1b235594fbab9a13240a1dac12ea9fd99b6440 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:58:04 +0100 Subject: Bluetooth: Add enhanced security model for Simple Pairing The current security model is based around the flags AUTH, ENCRYPT and SECURE. Starting with support for the Bluetooth 2.1 specification this is no longer sufficient. The different security levels are now defined as SDP, LOW, MEDIUM and SECURE. Previously it was possible to set each security independently, but this actually doesn't make a lot of sense. For Bluetooth the encryption depends on a previous successful authentication. Also you can only update your existing link key if you successfully created at least one before. And of course the update of link keys without having proper encryption in place is a security issue. The new security levels from the Bluetooth 2.1 specification are now used internally. All old settings are mapped to the new values and this way it ensures that old applications still work. The only limitation is that it is no longer possible to set authentication without also enabling encryption. No application should have done this anyway since this is actually a security issue. Without encryption the integrity of the authentication can't be guaranteed. As default for a new L2CAP or RFCOMM connection, the LOW security level is used. The only exception here are the service discovery sessions on PSM 1 where SDP level is used. To have similar security strength as with a Bluetooth 2.0 and before combination key, the MEDIUM level should be used. This is according to the Bluetooth specification. The MEDIUM level will not require any kind of man-in-the-middle (MITM) protection. Only the HIGH security level will require this. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 45 ++++++++++----- net/bluetooth/hci_event.c | 3 +- net/bluetooth/l2cap.c | 134 +++++++++++++------------------------------- net/bluetooth/rfcomm/core.c | 81 ++++++++------------------ net/bluetooth/sco.c | 2 +- 5 files changed, 97 insertions(+), 168 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index a4a789f24c8d..98f97a1e9bbb 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -325,7 +325,7 @@ EXPORT_SYMBOL(hci_get_route); /* Create SCO or ACL connection. * Device _must_ be locked */ -struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 auth_type) +struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 sec_level, __u8 auth_type) { struct hci_conn *acl; struct hci_conn *sco; @@ -340,6 +340,7 @@ struct hci_conn *hci_connect(struct hci_dev *hdev, int type, bdaddr_t *dst, __u8 hci_conn_hold(acl); if (acl->state == BT_OPEN || acl->state == BT_CLOSED) { + acl->sec_level = sec_level; acl->auth_type = auth_type; hci_acl_connect(acl); } @@ -385,16 +386,17 @@ int hci_conn_check_link_mode(struct hci_conn *conn) EXPORT_SYMBOL(hci_conn_check_link_mode); /* Authenticate remote device */ -int hci_conn_auth(struct hci_conn *conn) +static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level) { BT_DBG("conn %p", conn); - if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) { - if (!(conn->auth_type & 0x01)) { - conn->auth_type |= 0x01; - conn->link_mode &= ~HCI_LM_AUTH; - } - } + if (sec_level > conn->sec_level) + conn->link_mode &= ~HCI_LM_AUTH; + + conn->sec_level = sec_level; + + if (sec_level == BT_SECURITY_HIGH) + conn->auth_type |= 0x01; if (conn->link_mode & HCI_LM_AUTH) return 1; @@ -405,31 +407,42 @@ int hci_conn_auth(struct hci_conn *conn) hci_send_cmd(conn->hdev, HCI_OP_AUTH_REQUESTED, sizeof(cp), &cp); } + return 0; } -EXPORT_SYMBOL(hci_conn_auth); -/* Enable encryption */ -int hci_conn_encrypt(struct hci_conn *conn) +/* Enable security */ +int hci_conn_security(struct hci_conn *conn, __u8 sec_level) { BT_DBG("conn %p", conn); + if (sec_level == BT_SECURITY_SDP) + return 1; + + if (sec_level == BT_SECURITY_LOW) { + if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) + return hci_conn_auth(conn, sec_level); + else + return 1; + } + if (conn->link_mode & HCI_LM_ENCRYPT) - return hci_conn_auth(conn); + return hci_conn_auth(conn, sec_level); if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) return 0; - if (hci_conn_auth(conn)) { + if (hci_conn_auth(conn, sec_level)) { struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 1; hci_send_cmd(conn->hdev, HCI_OP_SET_CONN_ENCRYPT, sizeof(cp), &cp); } + return 0; } -EXPORT_SYMBOL(hci_conn_encrypt); +EXPORT_SYMBOL(hci_conn_security); /* Change link key */ int hci_conn_change_link_key(struct hci_conn *conn) @@ -442,12 +455,13 @@ int hci_conn_change_link_key(struct hci_conn *conn) hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY, sizeof(cp), &cp); } + return 0; } EXPORT_SYMBOL(hci_conn_change_link_key); /* Switch role */ -int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) +int hci_conn_switch_role(struct hci_conn *conn, __u8 role) { BT_DBG("conn %p", conn); @@ -460,6 +474,7 @@ int hci_conn_switch_role(struct hci_conn *conn, uint8_t role) cp.role = role; hci_send_cmd(conn->hdev, HCI_OP_SWITCH_ROLE, sizeof(cp), &cp); } + return 0; } EXPORT_SYMBOL(hci_conn_switch_role); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index beea9dbb6562..014fc8b320ba 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1601,7 +1601,8 @@ static inline void hci_remote_ext_features_evt(struct hci_dev *hdev, struct sk_b if (conn->state == BT_CONFIG) { if (!ev->status && hdev->ssp_mode > 0 && - conn->ssp_mode > 0 && conn->out) { + conn->ssp_mode > 0 && conn->out && + conn->sec_level != BT_SECURITY_SDP) { struct hci_cp_auth_requested cp; cp.handle = ev->handle; hci_send_cmd(hdev, HCI_OP_AUTH_REQUESTED, diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 123efb46d3f5..eadf09231866 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -263,12 +263,17 @@ static inline int l2cap_check_link_mode(struct sock *sk) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; - if ((l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) || - (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE)) - return hci_conn_encrypt(conn->hcon); + if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) + return hci_conn_security(conn->hcon, BT_SECURITY_HIGH); + + if (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) + return hci_conn_security(conn->hcon, BT_SECURITY_MEDIUM); if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) - return hci_conn_auth(conn->hcon); + return hci_conn_security(conn->hcon, BT_SECURITY_LOW); + + if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) + return hci_conn_security(conn->hcon, BT_SECURITY_SDP); return 1; } @@ -803,6 +808,7 @@ static int l2cap_do_connect(struct sock *sk) struct l2cap_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; + __u8 sec_level; __u8 auth_type; int err = 0; @@ -815,21 +821,37 @@ static int l2cap_do_connect(struct sock *sk) err = -ENOMEM; - if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH || - l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT || - l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { - if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) - auth_type = HCI_AT_NO_BONDING_MITM; + if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) + sec_level = BT_SECURITY_HIGH; + else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) + sec_level = BT_SECURITY_SDP; + else if (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) + sec_level = BT_SECURITY_MEDIUM; + else + sec_level = BT_SECURITY_LOW; + + if (sk->sk_type == SOCK_RAW) { + if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) + auth_type = HCI_AT_DEDICATED_BONDING_MITM; + else if (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) + auth_type = HCI_AT_DEDICATED_BONDING; else - auth_type = HCI_AT_GENERAL_BONDING_MITM; - } else { - if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) auth_type = HCI_AT_NO_BONDING; + } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { + if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) + auth_type = HCI_AT_NO_BONDING_MITM; else + auth_type = HCI_AT_NO_BONDING; + } else { + if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) + auth_type = HCI_AT_GENERAL_BONDING_MITM; + else if (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) auth_type = HCI_AT_GENERAL_BONDING; + else + auth_type = HCI_AT_NO_BONDING; } - hcon = hci_connect(hdev, ACL_LINK, dst, auth_type); + hcon = hci_connect(hdev, ACL_LINK, dst, sec_level, auth_type); if (!hcon) goto done; @@ -1402,11 +1424,6 @@ static void l2cap_chan_ready(struct sock *sk) */ parent->sk_data_ready(parent, 0); } - - if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) { - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - hci_conn_change_link_key(conn->hcon); - } } /* Copy frame to all raw sockets on that connection */ @@ -2323,77 +2340,7 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) return 0; } -static int l2cap_auth_cfm(struct hci_conn *hcon, u8 status) -{ - struct l2cap_chan_list *l; - struct l2cap_conn *conn = hcon->l2cap_data; - struct sock *sk; - - if (!conn) - return 0; - - l = &conn->chan_list; - - BT_DBG("conn %p", conn); - - read_lock(&l->lock); - - for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - struct l2cap_pinfo *pi = l2cap_pi(sk); - - bh_lock_sock(sk); - - if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && - !(hcon->link_mode & HCI_LM_ENCRYPT) && - !status) { - bh_unlock_sock(sk); - continue; - } - - if (sk->sk_state == BT_CONNECT) { - if (!status) { - struct l2cap_conn_req req; - req.scid = cpu_to_le16(l2cap_pi(sk)->scid); - req.psm = l2cap_pi(sk)->psm; - - l2cap_pi(sk)->ident = l2cap_get_ident(conn); - - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_REQ, sizeof(req), &req); - } else { - l2cap_sock_clear_timer(sk); - l2cap_sock_set_timer(sk, HZ / 10); - } - } else if (sk->sk_state == BT_CONNECT2) { - struct l2cap_conn_rsp rsp; - __u16 result; - - if (!status) { - sk->sk_state = BT_CONFIG; - result = L2CAP_CR_SUCCESS; - } else { - sk->sk_state = BT_DISCONN; - l2cap_sock_set_timer(sk, HZ / 10); - result = L2CAP_CR_SEC_BLOCK; - } - - rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); - rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - rsp.result = cpu_to_le16(result); - rsp.status = cpu_to_le16(L2CAP_CS_NO_INFO); - l2cap_send_cmd(conn, l2cap_pi(sk)->ident, - L2CAP_CONN_RSP, sizeof(rsp), &rsp); - } - - bh_unlock_sock(sk); - } - - read_unlock(&l->lock); - - return 0; -} - -static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) +static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) { struct l2cap_chan_list *l; struct l2cap_conn *conn = hcon->l2cap_data; @@ -2413,10 +2360,10 @@ static int l2cap_encrypt_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) bh_lock_sock(sk); - if ((pi->link_mode & (L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE)) && + if (!status && encrypt == 0x00 && + (pi->link_mode & L2CAP_LM_SECURE) && (sk->sk_state == BT_CONNECTED || - sk->sk_state == BT_CONFIG) && - !status && encrypt == 0x00) { + sk->sk_state == BT_CONFIG)) { __l2cap_sock_close(sk, ECONNREFUSED); bh_unlock_sock(sk); continue; @@ -2608,8 +2555,7 @@ static struct hci_proto l2cap_hci_proto = { .connect_ind = l2cap_connect_ind, .connect_cfm = l2cap_connect_cfm, .disconn_ind = l2cap_disconn_ind, - .auth_cfm = l2cap_auth_cfm, - .encrypt_cfm = l2cap_encrypt_cfm, + .security_cfm = l2cap_security_cfm, .recv_acldata = l2cap_recv_acldata }; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index edee49e00fbf..68f70c5270c6 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -226,16 +226,18 @@ static int rfcomm_l2sock_create(struct socket **sock) static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) { struct sock *sk = d->session->sock->sk; + struct l2cap_conn *conn = l2cap_pi(sk)->conn; - if (d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) { - if (!hci_conn_encrypt(l2cap_pi(sk)->conn->hcon)) - return 1; - } else if (d->link_mode & RFCOMM_LM_AUTH) { - if (!hci_conn_auth(l2cap_pi(sk)->conn->hcon)) - return 1; - } + if (d->link_mode & RFCOMM_LM_SECURE) + return hci_conn_security(conn->hcon, BT_SECURITY_HIGH); - return 0; + if (d->link_mode & RFCOMM_LM_ENCRYPT) + return hci_conn_security(conn->hcon, BT_SECURITY_MEDIUM); + + if (d->link_mode & RFCOMM_LM_AUTH) + return hci_conn_security(conn->hcon, BT_SECURITY_LOW); + + return 1; } /* ---- RFCOMM DLCs ---- */ @@ -389,9 +391,9 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, if (s->state == BT_CONNECTED) { if (rfcomm_check_link_mode(d)) - set_bit(RFCOMM_AUTH_PENDING, &d->flags); - else rfcomm_send_pn(s, 1, d); + else + set_bit(RFCOMM_AUTH_PENDING, &d->flags); } rfcomm_dlc_set_timer(d, RFCOMM_CONN_TIMEOUT); @@ -1199,14 +1201,14 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d) static void rfcomm_check_accept(struct rfcomm_dlc *d) { if (rfcomm_check_link_mode(d)) { - set_bit(RFCOMM_AUTH_PENDING, &d->flags); - rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); - } else { if (d->defer_setup) { set_bit(RFCOMM_DEFER_SETUP, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); } else rfcomm_dlc_accept(d); + } else { + set_bit(RFCOMM_AUTH_PENDING, &d->flags); + rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); } } @@ -1659,10 +1661,11 @@ static void rfcomm_process_connect(struct rfcomm_session *s) if (d->state == BT_CONFIG) { d->mtu = s->mtu; if (rfcomm_check_link_mode(d)) { + rfcomm_send_pn(s, 1, d); + } else { set_bit(RFCOMM_AUTH_PENDING, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); - } else - rfcomm_send_pn(s, 1, d); + } } } } @@ -1973,42 +1976,7 @@ static int rfcomm_run(void *unused) return 0; } -static void rfcomm_auth_cfm(struct hci_conn *conn, u8 status) -{ - struct rfcomm_session *s; - struct rfcomm_dlc *d; - struct list_head *p, *n; - - BT_DBG("conn %p status 0x%02x", conn, status); - - s = rfcomm_session_get(&conn->hdev->bdaddr, &conn->dst); - if (!s) - return; - - rfcomm_session_hold(s); - - list_for_each_safe(p, n, &s->dlcs) { - d = list_entry(p, struct rfcomm_dlc, list); - - if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && - !(conn->link_mode & HCI_LM_ENCRYPT) && !status) - continue; - - if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) - continue; - - if (!status) - set_bit(RFCOMM_AUTH_ACCEPT, &d->flags); - else - set_bit(RFCOMM_AUTH_REJECT, &d->flags); - } - - rfcomm_session_put(s); - - rfcomm_schedule(RFCOMM_SCHED_AUTH); -} - -static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) +static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) { struct rfcomm_session *s; struct rfcomm_dlc *d; @@ -2025,10 +1993,10 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); - if ((d->link_mode & (RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE)) && + if (!status && encrypt == 0x00 && + (d->link_mode & RFCOMM_LM_ENCRYPT) && (d->state == BT_CONNECTED || - d->state == BT_CONFIG) && - !status && encrypt == 0x00) { + d->state == BT_CONFIG)) { __rfcomm_dlc_close(d, ECONNREFUSED); continue; } @@ -2036,7 +2004,7 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) continue; - if (!status && encrypt) + if (!status) set_bit(RFCOMM_AUTH_ACCEPT, &d->flags); else set_bit(RFCOMM_AUTH_REJECT, &d->flags); @@ -2049,8 +2017,7 @@ static void rfcomm_encrypt_cfm(struct hci_conn *conn, u8 status, u8 encrypt) static struct hci_cb rfcomm_cb = { .name = "RFCOMM", - .auth_cfm = rfcomm_auth_cfm, - .encrypt_cfm = rfcomm_encrypt_cfm + .security_cfm = rfcomm_security_cfm }; static ssize_t rfcomm_dlc_sysfs_show(struct class *dev, char *buf) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 71df982c09c9..7f10f97cd697 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -195,7 +195,7 @@ static int sco_connect(struct sock *sk) else type = SCO_LINK; - hcon = hci_connect(hdev, type, dst, HCI_AT_NO_BONDING); + hcon = hci_connect(hdev, type, dst, BT_SECURITY_LOW, HCI_AT_NO_BONDING); if (!hcon) goto done; -- cgit v1.2.3 From 2af6b9d518ddfbc4d6990d5f9c9b1a05341c1cef Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:58:38 +0100 Subject: Bluetooth: Replace L2CAP link mode with security level Change the L2CAP internals to use the new security levels and remove the link mode details. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 160 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 110 insertions(+), 50 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index eadf09231866..e899a9371c00 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -78,8 +78,7 @@ static void l2cap_sock_timeout(unsigned long arg) bh_lock_sock(sk); if (sk->sk_state == BT_CONNECT && - (l2cap_pi(sk)->link_mode & (L2CAP_LM_AUTH | - L2CAP_LM_ENCRYPT | L2CAP_LM_SECURE))) + l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) reason = ECONNREFUSED; else reason = ETIMEDOUT; @@ -259,23 +258,11 @@ static void l2cap_chan_del(struct sock *sk, int err) } /* Service level security */ -static inline int l2cap_check_link_mode(struct sock *sk) +static inline int l2cap_check_security(struct sock *sk) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; - if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) - return hci_conn_security(conn->hcon, BT_SECURITY_HIGH); - - if (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) - return hci_conn_security(conn->hcon, BT_SECURITY_MEDIUM); - - if (l2cap_pi(sk)->link_mode & L2CAP_LM_AUTH) - return hci_conn_security(conn->hcon, BT_SECURITY_LOW); - - if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) - return hci_conn_security(conn->hcon, BT_SECURITY_SDP); - - return 1; + return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level); } static inline u8 l2cap_get_ident(struct l2cap_conn *conn) @@ -317,7 +304,7 @@ static void l2cap_do_start(struct sock *sk) struct l2cap_conn *conn = l2cap_pi(sk)->conn; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { - if (l2cap_check_link_mode(sk)) { + if (l2cap_check_security(sk)) { struct l2cap_conn_req req; req.scid = cpu_to_le16(l2cap_pi(sk)->scid); req.psm = l2cap_pi(sk)->psm; @@ -361,7 +348,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) } if (sk->sk_state == BT_CONNECT) { - if (l2cap_check_link_mode(sk)) { + if (l2cap_check_security(sk)) { struct l2cap_conn_req req; req.scid = cpu_to_le16(l2cap_pi(sk)->scid); req.psm = l2cap_pi(sk)->psm; @@ -376,7 +363,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) rsp.scid = cpu_to_le16(l2cap_pi(sk)->dcid); rsp.dcid = cpu_to_le16(l2cap_pi(sk)->scid); - if (l2cap_check_link_mode(sk)) { + if (l2cap_check_security(sk)) { if (bt_sk(sk)->defer_setup) { struct sock *parent = bt_sk(sk)->parent; rsp.result = cpu_to_le16(L2CAP_CR_PEND); @@ -439,7 +426,7 @@ static void l2cap_conn_unreliable(struct l2cap_conn *conn, int err) read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - if (l2cap_pi(sk)->link_mode & L2CAP_LM_RELIABLE) + if (l2cap_pi(sk)->force_reliable) sk->sk_err = err; } @@ -690,11 +677,15 @@ static void l2cap_sock_init(struct sock *sk, struct sock *parent) pi->imtu = l2cap_pi(parent)->imtu; pi->omtu = l2cap_pi(parent)->omtu; - pi->link_mode = l2cap_pi(parent)->link_mode; + pi->sec_level = l2cap_pi(parent)->sec_level; + pi->role_switch = l2cap_pi(parent)->role_switch; + pi->force_reliable = l2cap_pi(parent)->force_reliable; } else { pi->imtu = L2CAP_DEFAULT_MTU; pi->omtu = 0; - pi->link_mode = 0; + pi->sec_level = BT_SECURITY_LOW; + pi->role_switch = 0; + pi->force_reliable = 0; } /* Default config options */ @@ -792,6 +783,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_ l2cap_pi(sk)->psm = la->l2_psm; l2cap_pi(sk)->sport = la->l2_psm; sk->sk_state = BT_BOUND; + + if (btohs(la->l2_psm) == 0x0001) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; } write_unlock_bh(&l2cap_sk_list.lock); @@ -808,7 +802,6 @@ static int l2cap_do_connect(struct sock *sk) struct l2cap_conn *conn; struct hci_conn *hcon; struct hci_dev *hdev; - __u8 sec_level; __u8 auth_type; int err = 0; @@ -821,37 +814,39 @@ static int l2cap_do_connect(struct sock *sk) err = -ENOMEM; - if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) - sec_level = BT_SECURITY_HIGH; - else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) - sec_level = BT_SECURITY_SDP; - else if (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) - sec_level = BT_SECURITY_MEDIUM; - else - sec_level = BT_SECURITY_LOW; - if (sk->sk_type == SOCK_RAW) { - if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: auth_type = HCI_AT_DEDICATED_BONDING_MITM; - else if (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) + break; + case BT_SECURITY_MEDIUM: auth_type = HCI_AT_DEDICATED_BONDING; - else + break; + default: auth_type = HCI_AT_NO_BONDING; + break; + } } else if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { - if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) + if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) auth_type = HCI_AT_NO_BONDING_MITM; else auth_type = HCI_AT_NO_BONDING; } else { - if (l2cap_pi(sk)->link_mode & L2CAP_LM_SECURE) + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: auth_type = HCI_AT_GENERAL_BONDING_MITM; - else if (l2cap_pi(sk)->link_mode & L2CAP_LM_ENCRYPT) + break; + case BT_SECURITY_MEDIUM: auth_type = HCI_AT_GENERAL_BONDING; - else + break; + default: auth_type = HCI_AT_NO_BONDING; + break; + } } - hcon = hci_connect(hdev, ACL_LINK, dst, sec_level, auth_type); + hcon = hci_connect(hdev, ACL_LINK, dst, + l2cap_pi(sk)->sec_level, auth_type); if (!hcon) goto done; @@ -1219,7 +1214,15 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us break; } - l2cap_pi(sk)->link_mode = opt; + if (opt & L2CAP_LM_AUTH) + l2cap_pi(sk)->sec_level = BT_SECURITY_LOW; + if (opt & L2CAP_LM_ENCRYPT) + l2cap_pi(sk)->sec_level = BT_SECURITY_MEDIUM; + if (opt & L2CAP_LM_SECURE) + l2cap_pi(sk)->sec_level = BT_SECURITY_HIGH; + + l2cap_pi(sk)->role_switch = (opt & L2CAP_LM_MASTER); + l2cap_pi(sk)->force_reliable = (opt & L2CAP_LM_RELIABLE); break; default: @@ -1234,7 +1237,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; - int err = 0; + struct bt_security sec; + int len, err = 0; u32 opt; BT_DBG("sk %p", sk); @@ -1245,6 +1249,24 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch lock_sock(sk); switch (optname) { + case BT_SECURITY: + sec.level = BT_SECURITY_LOW; + + len = min_t(unsigned int, sizeof(sec), optlen); + if (copy_from_user((char *) &sec, optval, len)) { + err = -EFAULT; + break; + } + + if (sec.level < BT_SECURITY_LOW || + sec.level > BT_SECURITY_HIGH) { + err = -EINVAL; + break; + } + + l2cap_pi(sk)->sec_level = sec.level; + break; + case BT_DEFER_SETUP: if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { err = -EINVAL; @@ -1274,6 +1296,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us struct l2cap_options opts; struct l2cap_conninfo cinfo; int len, err = 0; + u32 opt; BT_DBG("sk %p", sk); @@ -1296,7 +1319,29 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us break; case L2CAP_LM: - if (put_user(l2cap_pi(sk)->link_mode, (u32 __user *) optval)) + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_LOW: + opt = L2CAP_LM_AUTH; + break; + case BT_SECURITY_MEDIUM: + opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT; + break; + case BT_SECURITY_HIGH: + opt = L2CAP_LM_AUTH | L2CAP_LM_ENCRYPT | + L2CAP_LM_SECURE; + break; + default: + opt = 0; + break; + } + + if (l2cap_pi(sk)->role_switch) + opt |= L2CAP_LM_MASTER; + + if (l2cap_pi(sk)->force_reliable) + opt |= L2CAP_LM_RELIABLE; + + if (put_user(opt, (u32 __user *) optval)) err = -EFAULT; break; @@ -1329,6 +1374,7 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname, char __us static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; + struct bt_security sec; int len, err = 0; BT_DBG("sk %p", sk); @@ -1342,6 +1388,15 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch lock_sock(sk); switch (optname) { + case BT_SECURITY: + sec.level = l2cap_pi(sk)->sec_level; + + len = min_t(unsigned int, len, sizeof(sec)); + if (copy_to_user(optval, (char *) &sec, len)) + err = -EFAULT; + + break; + case BT_DEFER_SETUP: if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { err = -EINVAL; @@ -1771,7 +1826,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd l2cap_pi(sk)->ident = cmd->ident; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { - if (l2cap_check_link_mode(sk)) { + if (l2cap_check_security(sk)) { if (bt_sk(sk)->defer_setup) { sk->sk_state = BT_CONNECT2; result = L2CAP_CR_PEND; @@ -2299,10 +2354,15 @@ static int l2cap_connect_ind(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) continue; if (!bacmp(&bt_sk(sk)->src, &hdev->bdaddr)) { - lm1 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode); + lm1 |= HCI_LM_ACCEPT; + if (l2cap_pi(sk)->role_switch) + lm1 |= HCI_LM_MASTER; exact++; - } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) - lm2 |= (HCI_LM_ACCEPT | l2cap_pi(sk)->link_mode); + } else if (!bacmp(&bt_sk(sk)->src, BDADDR_ANY)) { + lm2 |= HCI_LM_ACCEPT; + if (l2cap_pi(sk)->role_switch) + lm2 |= HCI_LM_MASTER; + } } read_unlock(&l2cap_sk_list.lock); @@ -2361,7 +2421,7 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) bh_lock_sock(sk); if (!status && encrypt == 0x00 && - (pi->link_mode & L2CAP_LM_SECURE) && + pi->sec_level == BT_SECURITY_HIGH && (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)) { __l2cap_sock_close(sk, ECONNREFUSED); @@ -2510,10 +2570,10 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf) sk_for_each(sk, node, &l2cap_sk_list.head) { struct l2cap_pinfo *pi = l2cap_pi(sk); - str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d 0x%x\n", + str += sprintf(str, "%s %s %d %d 0x%4.4x 0x%4.4x %d %d %d\n", batostr(&bt_sk(sk)->src), batostr(&bt_sk(sk)->dst), sk->sk_state, btohs(pi->psm), pi->scid, pi->dcid, - pi->imtu, pi->omtu, pi->link_mode); + pi->imtu, pi->omtu, pi->sec_level); } read_unlock_bh(&l2cap_sk_list.lock); -- cgit v1.2.3 From 9f2c8a03fbb3048cf38b158f87aa0c3c09bca084 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:58:40 +0100 Subject: Bluetooth: Replace RFCOMM link mode with security level Change the RFCOMM internals to use the new security levels and remove the link mode details. Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 28 +++++------------ net/bluetooth/rfcomm/sock.c | 75 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 75 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 68f70c5270c6..db83f92d274c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -223,21 +223,11 @@ static int rfcomm_l2sock_create(struct socket **sock) return err; } -static inline int rfcomm_check_link_mode(struct rfcomm_dlc *d) +static inline int rfcomm_check_security(struct rfcomm_dlc *d) { struct sock *sk = d->session->sock->sk; - struct l2cap_conn *conn = l2cap_pi(sk)->conn; - if (d->link_mode & RFCOMM_LM_SECURE) - return hci_conn_security(conn->hcon, BT_SECURITY_HIGH); - - if (d->link_mode & RFCOMM_LM_ENCRYPT) - return hci_conn_security(conn->hcon, BT_SECURITY_MEDIUM); - - if (d->link_mode & RFCOMM_LM_AUTH) - return hci_conn_security(conn->hcon, BT_SECURITY_LOW); - - return 1; + return hci_conn_security(l2cap_pi(sk)->conn->hcon, d->sec_level); } /* ---- RFCOMM DLCs ---- */ @@ -390,7 +380,7 @@ static int __rfcomm_dlc_open(struct rfcomm_dlc *d, bdaddr_t *src, bdaddr_t *dst, d->cfc = (s->cfc == RFCOMM_CFC_UNKNOWN) ? 0 : s->cfc; if (s->state == BT_CONNECTED) { - if (rfcomm_check_link_mode(d)) + if (rfcomm_check_security(d)) rfcomm_send_pn(s, 1, d); else set_bit(RFCOMM_AUTH_PENDING, &d->flags); @@ -1192,7 +1182,7 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d) d->state_change(d, 0); rfcomm_dlc_unlock(d); - if (d->link_mode & RFCOMM_LM_MASTER) + if (d->role_switch) hci_conn_switch_role(l2cap_pi(sk)->conn->hcon, 0x00); rfcomm_send_msc(d->session, 1, d->dlci, d->v24_sig); @@ -1200,7 +1190,7 @@ void rfcomm_dlc_accept(struct rfcomm_dlc *d) static void rfcomm_check_accept(struct rfcomm_dlc *d) { - if (rfcomm_check_link_mode(d)) { + if (rfcomm_check_security(d)) { if (d->defer_setup) { set_bit(RFCOMM_DEFER_SETUP, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); @@ -1660,7 +1650,7 @@ static void rfcomm_process_connect(struct rfcomm_session *s) d = list_entry(p, struct rfcomm_dlc, list); if (d->state == BT_CONFIG) { d->mtu = s->mtu; - if (rfcomm_check_link_mode(d)) { + if (rfcomm_check_security(d)) { rfcomm_send_pn(s, 1, d); } else { set_bit(RFCOMM_AUTH_PENDING, &d->flags); @@ -1748,10 +1738,6 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) } else rfcomm_dlc_accept(d); } - if (d->link_mode & RFCOMM_LM_SECURE) { - struct sock *sk = s->sock->sk; - hci_conn_change_link_key(l2cap_pi(sk)->conn->hcon); - } continue; } else if (test_and_clear_bit(RFCOMM_AUTH_REJECT, &d->flags)) { rfcomm_dlc_clear_timer(d); @@ -1994,7 +1980,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) d = list_entry(p, struct rfcomm_dlc, list); if (!status && encrypt == 0x00 && - (d->link_mode & RFCOMM_LM_ENCRYPT) && + d->sec_level == BT_SECURITY_HIGH && (d->state == BT_CONNECTED || d->state == BT_CONFIG)) { __rfcomm_dlc_close(d, ECONNREFUSED); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index d37a829a81e4..9986ef35c890 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -261,14 +261,19 @@ static void rfcomm_sock_init(struct sock *sk, struct sock *parent) if (parent) { sk->sk_type = parent->sk_type; - pi->link_mode = rfcomm_pi(parent)->link_mode; pi->dlc->defer_setup = bt_sk(parent)->defer_setup; + + pi->sec_level = rfcomm_pi(parent)->sec_level; + pi->role_switch = rfcomm_pi(parent)->role_switch; } else { - pi->link_mode = 0; pi->dlc->defer_setup = 0; + + pi->sec_level = BT_SECURITY_LOW; + pi->role_switch = 0; } - pi->dlc->link_mode = pi->link_mode; + pi->dlc->sec_level = pi->sec_level; + pi->dlc->role_switch = pi->role_switch; } static struct proto rfcomm_proto = { @@ -408,7 +413,8 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a bacpy(&bt_sk(sk)->dst, &sa->rc_bdaddr); rfcomm_pi(sk)->channel = sa->rc_channel; - d->link_mode = rfcomm_pi(sk)->link_mode; + d->sec_level = rfcomm_pi(sk)->sec_level; + d->role_switch = rfcomm_pi(sk)->role_switch; err = rfcomm_dlc_open(d, &bt_sk(sk)->src, &sa->rc_bdaddr, sa->rc_channel); if (!err) @@ -741,7 +747,14 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u break; } - rfcomm_pi(sk)->link_mode = opt; + if (opt & RFCOMM_LM_AUTH) + rfcomm_pi(sk)->sec_level = BT_SECURITY_LOW; + if (opt & RFCOMM_LM_ENCRYPT) + rfcomm_pi(sk)->sec_level = BT_SECURITY_MEDIUM; + if (opt & RFCOMM_LM_SECURE) + rfcomm_pi(sk)->sec_level = BT_SECURITY_HIGH; + + rfcomm_pi(sk)->role_switch = (opt & RFCOMM_LM_MASTER); break; default: @@ -756,7 +769,8 @@ static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, char __u static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen) { struct sock *sk = sock->sk; - int err = 0; + struct bt_security sec; + int len, err = 0; u32 opt; BT_DBG("sk %p", sk); @@ -767,6 +781,23 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c lock_sock(sk); switch (optname) { + case BT_SECURITY: + sec.level = BT_SECURITY_LOW; + + len = min_t(unsigned int, sizeof(sec), optlen); + if (copy_from_user((char *) &sec, optval, len)) { + err = -EFAULT; + break; + } + + if (sec.level > BT_SECURITY_HIGH) { + err = -EINVAL; + break; + } + + rfcomm_pi(sk)->sec_level = sec.level; + break; + case BT_DEFER_SETUP: if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { err = -EINVAL; @@ -796,6 +827,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u struct sock *l2cap_sk; struct rfcomm_conninfo cinfo; int len, err = 0; + u32 opt; BT_DBG("sk %p", sk); @@ -806,7 +838,26 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u switch (optname) { case RFCOMM_LM: - if (put_user(rfcomm_pi(sk)->link_mode, (u32 __user *) optval)) + switch (rfcomm_pi(sk)->sec_level) { + case BT_SECURITY_LOW: + opt = RFCOMM_LM_AUTH; + break; + case BT_SECURITY_MEDIUM: + opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT; + break; + case BT_SECURITY_HIGH: + opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT | + RFCOMM_LM_SECURE; + break; + default: + opt = 0; + break; + } + + if (rfcomm_pi(sk)->role_switch) + opt |= RFCOMM_LM_MASTER; + + if (put_user(opt, (u32 __user *) optval)) err = -EFAULT; break; @@ -840,6 +891,7 @@ static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __u static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; + struct bt_security sec; int len, err = 0; BT_DBG("sk %p", sk); @@ -853,6 +905,15 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c lock_sock(sk); switch (optname) { + case BT_SECURITY: + sec.level = rfcomm_pi(sk)->sec_level; + + len = min_t(unsigned int, len, sizeof(sec)); + if (copy_to_user(optval, (char *) &sec, len)) + err = -EFAULT; + + break; + case BT_DEFER_SETUP: if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { err = -EINVAL; -- cgit v1.2.3 From 8c84b83076b5062f59b6167cdda90d9e5124aa71 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 16 Jan 2009 08:17:51 +0100 Subject: Bluetooth: Pause RFCOMM TX when encryption drops A role switch with devices following the Bluetooth pre-2.1 standards or without Encryption Pause and Resume support is not possible if encryption is enabled. Most newer headsets require the role switch, but also require that the connection is encrypted. For connections with a high security mode setting, the link will be immediately dropped. When the connection uses medium security mode setting, then a grace period is introduced where the TX is halted and the remote device gets a change to re-enable encryption after the role switch. If not re-enabled the link will be dropped. Based on initial work by Ville Tervo Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index db83f92d274c..dafaee91cdfb 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1979,12 +1979,23 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) list_for_each_safe(p, n, &s->dlcs) { d = list_entry(p, struct rfcomm_dlc, list); - if (!status && encrypt == 0x00 && - d->sec_level == BT_SECURITY_HIGH && - (d->state == BT_CONNECTED || - d->state == BT_CONFIG)) { - __rfcomm_dlc_close(d, ECONNREFUSED); - continue; + if (test_and_clear_bit(RFCOMM_SEC_PENDING, &d->flags)) { + rfcomm_dlc_clear_timer(d); + if (status || encrypt == 0x00) { + __rfcomm_dlc_close(d, ECONNREFUSED); + continue; + } + } + + if (d->state == BT_CONNECTED && !status && encrypt == 0x00) { + if (d->sec_level == BT_SECURITY_MEDIUM) { + set_bit(RFCOMM_SEC_PENDING, &d->flags); + rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); + continue; + } else if (d->sec_level == BT_SECURITY_HIGH) { + __rfcomm_dlc_close(d, ECONNREFUSED); + continue; + } } if (!test_and_clear_bit(RFCOMM_AUTH_PENDING, &d->flags)) -- cgit v1.2.3 From f62e4323ab43c59e7cd7f72c1eb392d7c767ce5a Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 15 Jan 2009 21:58:44 +0100 Subject: Bluetooth: Disconnect L2CAP connections without encryption For L2CAP connections with high security setting, the link will be immediately dropped when the encryption gets disabled. For L2CAP connections with medium security there will be grace period where the remote device has the chance to re-enable encryption. If it doesn't happen then the link will also be disconnected. The requirement for the grace period with medium security comes from Bluetooth 2.0 and earlier devices that require role switching. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index e899a9371c00..b2d279c245cf 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -77,7 +77,9 @@ static void l2cap_sock_timeout(unsigned long arg) bh_lock_sock(sk); - if (sk->sk_state == BT_CONNECT && + if (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG) + reason = ECONNREFUSED; + else if (sk->sk_state == BT_CONNECT && l2cap_pi(sk)->sec_level != BT_SECURITY_SDP) reason = ECONNREFUSED; else @@ -2400,6 +2402,20 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) return 0; } +static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt) +{ + if (encrypt == 0x00) { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) { + l2cap_sock_clear_timer(sk); + l2cap_sock_set_timer(sk, HZ * 5); + } else if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) + __l2cap_sock_close(sk, ECONNREFUSED); + } else { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) + l2cap_sock_clear_timer(sk); + } +} + static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) { struct l2cap_chan_list *l; @@ -2416,15 +2432,11 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) read_lock(&l->lock); for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { - struct l2cap_pinfo *pi = l2cap_pi(sk); - bh_lock_sock(sk); - if (!status && encrypt == 0x00 && - pi->sec_level == BT_SECURITY_HIGH && - (sk->sk_state == BT_CONNECTED || + if (!status && (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)) { - __l2cap_sock_close(sk, ECONNREFUSED); + l2cap_check_encryption(sk, encrypt); bh_unlock_sock(sk); continue; } -- cgit v1.2.3 From 0588d94fd7e414367a7ae517569d2222441c255f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 16 Jan 2009 10:06:13 +0100 Subject: Bluetooth: Restrict application of socket options The new socket options should only be evaluated for SOL_BLUETOOTH level and not for every other level. Previously this causes some minor issues when detecting if a kernel with certain features is available. Also restrict BT_SECURITY to SOCK_SEQPACKET for L2CAP and SOCK_STREAM for the RFCOMM protocol. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 16 ++++++++++++++++ net/bluetooth/rfcomm/sock.c | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index b2d279c245cf..82a9e692baed 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1248,10 +1248,18 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch if (level == SOL_L2CAP) return l2cap_sock_setsockopt_old(sock, optname, optval, optlen); + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + lock_sock(sk); switch (optname) { case BT_SECURITY: + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + break; + } + sec.level = BT_SECURITY_LOW; len = min_t(unsigned int, sizeof(sec), optlen); @@ -1384,6 +1392,9 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch if (level == SOL_L2CAP) return l2cap_sock_getsockopt_old(sock, optname, optval, optlen); + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + if (get_user(len, optlen)) return -EFAULT; @@ -1391,6 +1402,11 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch switch (optname) { case BT_SECURITY: + if (sk->sk_type != SOCK_SEQPACKET) { + err = -EINVAL; + break; + } + sec.level = l2cap_pi(sk)->sec_level; len = min_t(unsigned int, len, sizeof(sec)); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 9986ef35c890..7f482784e9f7 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -778,10 +778,18 @@ static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, c if (level == SOL_RFCOMM) return rfcomm_sock_setsockopt_old(sock, optname, optval, optlen); + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + lock_sock(sk); switch (optname) { case BT_SECURITY: + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + break; + } + sec.level = BT_SECURITY_LOW; len = min_t(unsigned int, sizeof(sec), optlen); @@ -899,6 +907,9 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c if (level == SOL_RFCOMM) return rfcomm_sock_getsockopt_old(sock, optname, optval, optlen); + if (level != SOL_BLUETOOTH) + return -ENOPROTOOPT; + if (get_user(len, optlen)) return -EFAULT; @@ -906,6 +917,11 @@ static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, c switch (optname) { case BT_SECURITY: + if (sk->sk_type != SOCK_STREAM) { + err = -EINVAL; + break; + } + sec.level = rfcomm_pi(sk)->sec_level; len = min_t(unsigned int, len, sizeof(sec)); -- cgit v1.2.3 From 5f9018af004fa8635bbbe3ab2dc61e8a686edfaa Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 16 Jan 2009 10:09:50 +0100 Subject: Bluetooth: Update version numbers With the support for the enhanced security model and the support for deferring connection setup, it is a good idea to increase various version numbers. This is purely cosmetic and has no effect on the behavior, but can be really helpful when debugging problems in different kernel versions. Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 2 +- net/bluetooth/l2cap.c | 2 +- net/bluetooth/rfcomm/core.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 7c0031ff8cfb..0d1b94da978e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -41,7 +41,7 @@ #include -#define VERSION "2.14" +#define VERSION "2.15" /* Bluetooth sockets */ #define BT_MAX_PROTO 8 diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 82a9e692baed..71a064fa0285 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -50,7 +50,7 @@ #include #include -#define VERSION "2.11" +#define VERSION "2.12" static u32 l2cap_feat_mask = 0x0000; diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index dafaee91cdfb..ad0e25a21832 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -46,7 +46,7 @@ #include #include -#define VERSION "1.10" +#define VERSION "1.11" static int disable_cfc = 0; static int channel_mtu = -1; -- cgit v1.2.3 From dd2efd03b49d56ae795c71335bc7358022514c32 Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sat, 31 Jan 2009 13:51:15 +0800 Subject: Bluetooth: Remove CONFIG_DEBUG_LOCK_ALLOC ifdefs Due to lockdep changes, the CONFIG_DEBUG_LOCK_ALLOC ifdef is not needed now. So just remove it here. The following commit fixed the !lockdep build warnings: commit e8f6fbf62de37cbc2e179176ac7010d5f4396b67 Author: Ingo Molnar Date: Wed Nov 12 01:38:36 2008 +0000 lockdep: include/linux/lockdep.h - fix warning in net/bluetooth/af_bluetooth.c Signed-off-by: Dave Young Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 0d1b94da978e..ded57974390e 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -48,7 +48,6 @@ static struct net_proto_family *bt_proto[BT_MAX_PROTO]; static DEFINE_RWLOCK(bt_proto_lock); -#ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; static const char *bt_key_strings[BT_MAX_PROTO] = { "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", @@ -86,11 +85,6 @@ static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) bt_slock_key_strings[proto], &bt_slock_key[proto], bt_key_strings[proto], &bt_lock_key[proto]); } -#else -static inline void bt_sock_reclassify_lock(struct socket *sock, int proto) -{ -} -#endif int bt_sock_register(int proto, struct net_proto_family *ops) { -- cgit v1.2.3 From 6e1031a40029492c10509e8c3dcac9b611438ccb Mon Sep 17 00:00:00 2001 From: Jaikumar Ganesh Date: Mon, 2 Feb 2009 18:03:57 -0800 Subject: Bluetooth: When encryption is dropped, do not send RFCOMM packets During a role change with pre-Bluetooth 2.1 devices, the remote side drops the encryption of the RFCOMM connection. We allow a grace period for the encryption to be re-established, before dropping the connection. During this grace period, the RFCOMM_SEC_PENDING flag is set. Check this flag before sending RFCOMM packets. Signed-off-by: Jaikumar Ganesh Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index ad0e25a21832..3717c25ba33a 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1749,6 +1749,9 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) continue; } + if (test_bit(RFCOMM_SEC_PENDING, &d->flags)) + continue; + if (test_bit(RFCOMM_TX_THROTTLED, &s->flags)) continue; -- cgit v1.2.3 From 255c76014af74165428e7aa16414b857e2bdccf2 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Wed, 4 Feb 2009 21:07:19 +0100 Subject: Bluetooth: Don't check encryption for L2CAP raw sockets For L2CAP sockets with medium and high security requirement a missing encryption will enforce the closing of the link. For the L2CAP raw sockets this is not needed, so skip that check. This fixes a crash when pairing Bluetooth 2.0 (and earlier) devices since the L2CAP state machine got confused and then locked up the whole system. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 71a064fa0285..b677af671f31 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -2420,6 +2420,9 @@ static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) static inline void l2cap_check_encryption(struct sock *sk, u8 encrypt) { + if (sk->sk_type != SOCK_SEQPACKET) + return; + if (encrypt == 0x00) { if (l2cap_pi(sk)->sec_level == BT_SECURITY_MEDIUM) { l2cap_sock_clear_timer(sk); -- cgit v1.2.3 From efc7688b557dd1be10eead7399b315efcb1dbc74 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 6 Feb 2009 09:13:37 +0100 Subject: Bluetooth: Add SCO fallback for eSCO connection attempts When attempting to setup eSCO connections it can happen that some link manager implementations fail to properly negotiate the eSCO parameters and thus fail the eSCO setup. Normally the link manager is responsible for the negotiation of the parameters and actually fallback to SCO if no agreement can be reached. In cases where the link manager is just too stupid, then at least try to establish a SCO link if eSCO fails. For the Bluetooth devices with EDR support this includes handling packet types of EDR basebands. This is particular tricky since for the EDR the logic of enabling/disabling one specific packet type is turned around. This fix contains an extra bitmask to disable eSCO EDR packet when trying to fallback to a SCO connection. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 9 +++++++-- net/bluetooth/hci_event.c | 16 ++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 98f97a1e9bbb..2435e830ba60 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -123,6 +123,8 @@ void hci_add_sco(struct hci_conn *conn, __u16 handle) conn->state = BT_CONNECT; conn->out = 1; + conn->attempt++; + cp.handle = cpu_to_le16(handle); cp.pkt_type = cpu_to_le16(conn->pkt_type); @@ -139,6 +141,8 @@ void hci_setup_sync(struct hci_conn *conn, __u16 handle) conn->state = BT_CONNECT; conn->out = 1; + conn->attempt++; + cp.handle = cpu_to_le16(handle); cp.pkt_type = cpu_to_le16(conn->pkt_type); @@ -216,12 +220,13 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) break; case SCO_LINK: if (lmp_esco_capable(hdev)) - conn->pkt_type = hdev->esco_type & SCO_ESCO_MASK; + conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | + (hdev->esco_type & EDR_ESCO_MASK); else conn->pkt_type = hdev->pkt_type & SCO_PTYPE_MASK; break; case ESCO_LINK: - conn->pkt_type = hdev->esco_type; + conn->pkt_type = hdev->esco_type & ~EDR_ESCO_MASK; break; } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 014fc8b320ba..899b8991a466 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -484,6 +484,15 @@ static void hci_cc_read_local_features(struct hci_dev *hdev, struct sk_buff *skb if (hdev->features[4] & LMP_EV5) hdev->esco_type |= (ESCO_EV5); + if (hdev->features[5] & LMP_EDR_ESCO_2M) + hdev->esco_type |= (ESCO_2EV3); + + if (hdev->features[5] & LMP_EDR_ESCO_3M) + hdev->esco_type |= (ESCO_3EV3); + + if (hdev->features[5] & LMP_EDR_3S_ESCO) + hdev->esco_type |= (ESCO_2EV5 | ESCO_3EV5); + BT_DBG("%s features 0x%.2x%.2x%.2x%.2x%.2x%.2x%.2x%.2x", hdev->name, hdev->features[0], hdev->features[1], hdev->features[2], hdev->features[3], @@ -1639,6 +1648,13 @@ static inline void hci_sync_conn_complete_evt(struct hci_dev *hdev, struct sk_bu conn->type = SCO_LINK; } + if (conn->out && ev->status == 0x1c && conn->attempt < 2) { + conn->pkt_type = (hdev->esco_type & SCO_ESCO_MASK) | + (hdev->esco_type & EDR_ESCO_MASK); + hci_setup_sync(conn, conn->link->handle); + goto unlock; + } + if (!ev->status) { conn->handle = __le16_to_cpu(ev->handle); conn->state = BT_CONNECTED; -- cgit v1.2.3 From 0684e5f9fb9e3f7e168ab831dfca693bcb44805b Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 9 Feb 2009 02:48:38 +0100 Subject: Bluetooth: Use general bonding whenever possible When receiving incoming connection to specific services, always use general bonding. This ensures that the link key gets stored and can be used for further authentications. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 23 +++++++++-------------- net/bluetooth/l2cap.c | 16 +++++++++++++++- net/bluetooth/rfcomm/core.c | 16 +++++++++++++++- 3 files changed, 39 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 2435e830ba60..7fc4c048b57b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -391,19 +391,14 @@ int hci_conn_check_link_mode(struct hci_conn *conn) EXPORT_SYMBOL(hci_conn_check_link_mode); /* Authenticate remote device */ -static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level) +static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("conn %p", conn); - if (sec_level > conn->sec_level) - conn->link_mode &= ~HCI_LM_AUTH; - - conn->sec_level = sec_level; - - if (sec_level == BT_SECURITY_HIGH) - conn->auth_type |= 0x01; - - if (conn->link_mode & HCI_LM_AUTH) + if (sec_level > conn->sec_level) { + conn->sec_level = sec_level; + conn->auth_type = auth_type; + } else if (conn->link_mode & HCI_LM_AUTH) return 1; if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { @@ -417,7 +412,7 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level) } /* Enable security */ -int hci_conn_security(struct hci_conn *conn, __u8 sec_level) +int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("conn %p", conn); @@ -426,18 +421,18 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level) if (sec_level == BT_SECURITY_LOW) { if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0) - return hci_conn_auth(conn, sec_level); + return hci_conn_auth(conn, sec_level, auth_type); else return 1; } if (conn->link_mode & HCI_LM_ENCRYPT) - return hci_conn_auth(conn, sec_level); + return hci_conn_auth(conn, sec_level, auth_type); if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) return 0; - if (hci_conn_auth(conn, sec_level)) { + if (hci_conn_auth(conn, sec_level, auth_type)) { struct hci_cp_set_conn_encrypt cp; cp.handle = cpu_to_le16(conn->handle); cp.encrypt = 1; diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index b677af671f31..8a93dde4095b 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -263,8 +263,22 @@ static void l2cap_chan_del(struct sock *sk, int err) static inline int l2cap_check_security(struct sock *sk) { struct l2cap_conn *conn = l2cap_pi(sk)->conn; + __u8 auth_type; + + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: + auth_type = HCI_AT_GENERAL_BONDING_MITM; + break; + case BT_SECURITY_MEDIUM: + auth_type = HCI_AT_GENERAL_BONDING; + break; + default: + auth_type = HCI_AT_NO_BONDING; + break; + } - return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level); + return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level, + auth_type); } static inline u8 l2cap_get_ident(struct l2cap_conn *conn) diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 3717c25ba33a..1828ec06ad1c 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -226,8 +226,22 @@ static int rfcomm_l2sock_create(struct socket **sock) static inline int rfcomm_check_security(struct rfcomm_dlc *d) { struct sock *sk = d->session->sock->sk; + __u8 auth_type; - return hci_conn_security(l2cap_pi(sk)->conn->hcon, d->sec_level); + switch (d->sec_level) { + case BT_SECURITY_HIGH: + auth_type = HCI_AT_GENERAL_BONDING_MITM; + break; + case BT_SECURITY_MEDIUM: + auth_type = HCI_AT_GENERAL_BONDING; + break; + default: + auth_type = HCI_AT_NO_BONDING; + break; + } + + return hci_conn_security(l2cap_pi(sk)->conn->hcon, d->sec_level, + auth_type); } /* ---- RFCOMM DLCs ---- */ -- cgit v1.2.3 From 657e17b03c80bec817975984d221bef716f83558 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 6 Feb 2009 19:45:36 +0100 Subject: Bluetooth: Set authentication requirements if not available When no authentication requirements are selected, but an outgoing or incoming connection has requested any kind of security enforcement, then set these authentication requirements. This ensures that the userspace always gets informed about the authentication requirements (if available). Only when no security enforcement has happened, the kernel will signal invalid requirements. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7fc4c048b57b..dcdaa4be7847 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -416,6 +416,9 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("conn %p", conn); + if (conn->auth_type == 0xff) + conn->auth_type = auth_type; + if (sec_level == BT_SECURITY_SDP) return 1; -- cgit v1.2.3 From 984947dc64f82bc6cafa4d84ba1a139718f634a8 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 6 Feb 2009 23:35:19 +0100 Subject: Bluetooth: Fix race condition with L2CAP information request When two L2CAP connections are requested quickly after the ACL link has been established there exists a window for a race condition where a connection request is sent before the information response has been received. Any connection request should only be sent after an exchange of the extended features mask has been finished. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 8a93dde4095b..07fdbc7dd54d 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -320,6 +320,9 @@ static void l2cap_do_start(struct sock *sk) struct l2cap_conn *conn = l2cap_pi(sk)->conn; if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { + if (!(conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE)) + return; + if (l2cap_check_security(sk)) { struct l2cap_conn_req req; req.scid = cpu_to_le16(l2cap_pi(sk)->scid); @@ -455,6 +458,8 @@ static void l2cap_info_timeout(unsigned long arg) conn->info_ident = 0; + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + l2cap_conn_start(conn); } @@ -1787,6 +1792,9 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd cmd->ident == conn->info_ident) { conn->info_ident = 0; del_timer(&conn->info_timer); + + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + l2cap_conn_start(conn); } @@ -1857,7 +1865,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd l2cap_pi(sk)->ident = cmd->ident; - if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) { + if (conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_DONE) { if (l2cap_check_security(sk)) { if (bt_sk(sk)->defer_setup) { sk->sk_state = BT_CONNECT2; @@ -2176,10 +2184,13 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm del_timer(&conn->info_timer); - if (type == L2CAP_IT_FEAT_MASK) + if (type == L2CAP_IT_FEAT_MASK) { conn->feat_mask = get_unaligned_le32(rsp->data); - l2cap_conn_start(conn); + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + + l2cap_conn_start(conn); + } return 0; } -- cgit v1.2.3 From 6a8d3010b313d99adbb28f1826fac0234395bb26 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 6 Feb 2009 23:56:36 +0100 Subject: Bluetooth: Fix double L2CAP connection request If the remote L2CAP server uses authentication pending stage and encryption is enabled it can happen that a L2CAP connection request is sent twice due to a race condition in the connection state machine. When the remote side indicates any kind of connection pending, then track this state and skip sending of L2CAP commands for this period. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 07fdbc7dd54d..01f750142d55 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1946,11 +1946,14 @@ static inline int l2cap_connect_rsp(struct l2cap_conn *conn, struct l2cap_cmd_hd l2cap_pi(sk)->dcid = dcid; l2cap_pi(sk)->conf_state |= L2CAP_CONF_REQ_SENT; + l2cap_pi(sk)->conf_state &= ~L2CAP_CONF_CONNECT_PEND; + l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, l2cap_build_conf_req(sk, req), req); break; case L2CAP_CR_PEND: + l2cap_pi(sk)->conf_state |= L2CAP_CONF_CONNECT_PEND; break; default: @@ -2478,6 +2481,11 @@ static int l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) for (sk = l->head; sk; sk = l2cap_pi(sk)->next_c) { bh_lock_sock(sk); + if (l2cap_pi(sk)->conf_state & L2CAP_CONF_CONNECT_PEND) { + bh_unlock_sock(sk); + continue; + } + if (!status && (sk->sk_state == BT_CONNECTED || sk->sk_state == BT_CONFIG)) { l2cap_check_encryption(sk, encrypt); -- cgit v1.2.3 From 435fef20acfc48f46476abad55b0cd3aa47b8365 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 9 Feb 2009 03:55:28 +0100 Subject: Bluetooth: Don't enforce authentication for L2CAP PSM 1 and 3 The recommendation for the L2CAP PSM 1 (SDP) is to not use any kind of authentication or encryption. So don't trigger authentication for incoming and outgoing SDP connections. For L2CAP PSM 3 (RFCOMM) there is no clear requirement, but with Bluetooth 2.1 the initiator is required to enable authentication and encryption first and this gets enforced. So there is no need to trigger an additional authentication step. The RFCOMM service security will make sure that a secure enough link key is present. When the encryption gets enabled after the SDP connection setup, then switch the security level from SDP to low security. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 01f750142d55..88340d24d11d 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -805,7 +805,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_ l2cap_pi(sk)->sport = la->l2_psm; sk->sk_state = BT_BOUND; - if (btohs(la->l2_psm) == 0x0001) + if (btohs(la->l2_psm) == 0x0001 || btohs(la->l2_psm) == 0x0003) l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; } @@ -852,6 +852,9 @@ static int l2cap_do_connect(struct sock *sk) auth_type = HCI_AT_NO_BONDING_MITM; else auth_type = HCI_AT_NO_BONDING; + + if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; } else { switch (l2cap_pi(sk)->sec_level) { case BT_SECURITY_HIGH: -- cgit v1.2.3 From e1027a7c69700301d14db03d2e049ee60c4f92df Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 9 Feb 2009 09:18:02 +0100 Subject: Bluetooth: Request L2CAP fixed channel list if available If the extended features mask indicates support for fixed channels, request the list of available fixed channels. This also enables the fixed channel features bit so remote implementations can request information about it. Currently only the signal channel will be listed. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 40 +++++++++++++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 88340d24d11d..985366c36f48 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -50,9 +50,10 @@ #include #include -#define VERSION "2.12" +#define VERSION "2.13" -static u32 l2cap_feat_mask = 0x0000; +static u32 l2cap_feat_mask = 0x0080; +static u8 l2cap_fixed_chan[8] = { 0x02, }; static const struct proto_ops l2cap_sock_ops; @@ -456,9 +457,8 @@ static void l2cap_info_timeout(unsigned long arg) { struct l2cap_conn *conn = (void *) arg; - conn->info_ident = 0; - conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; l2cap_conn_start(conn); } @@ -1793,10 +1793,10 @@ static inline int l2cap_command_rej(struct l2cap_conn *conn, struct l2cap_cmd_hd if ((conn->info_state & L2CAP_INFO_FEAT_MASK_REQ_SENT) && cmd->ident == conn->info_ident) { - conn->info_ident = 0; del_timer(&conn->info_timer); conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; l2cap_conn_start(conn); } @@ -2165,6 +2165,14 @@ static inline int l2cap_information_req(struct l2cap_conn *conn, struct l2cap_cm put_unaligned(cpu_to_le32(l2cap_feat_mask), (__le32 *) rsp->data); l2cap_send_cmd(conn, cmd->ident, L2CAP_INFO_RSP, sizeof(buf), buf); + } else if (type == L2CAP_IT_FIXED_CHAN) { + u8 buf[12]; + struct l2cap_info_rsp *rsp = (struct l2cap_info_rsp *) buf; + rsp->type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); + rsp->result = cpu_to_le16(L2CAP_IR_SUCCESS); + memcpy(buf + 4, l2cap_fixed_chan, 8); + l2cap_send_cmd(conn, cmd->ident, + L2CAP_INFO_RSP, sizeof(buf), buf); } else { struct l2cap_info_rsp rsp; rsp.type = cpu_to_le16(type); @@ -2186,14 +2194,28 @@ static inline int l2cap_information_rsp(struct l2cap_conn *conn, struct l2cap_cm BT_DBG("type 0x%4.4x result 0x%2.2x", type, result); - conn->info_ident = 0; - del_timer(&conn->info_timer); if (type == L2CAP_IT_FEAT_MASK) { conn->feat_mask = get_unaligned_le32(rsp->data); + if (conn->feat_mask & 0x0080) { + struct l2cap_info_req req; + req.type = cpu_to_le16(L2CAP_IT_FIXED_CHAN); + + conn->info_ident = l2cap_get_ident(conn); + + l2cap_send_cmd(conn, conn->info_ident, + L2CAP_INFO_REQ, sizeof(req), &req); + } else { + conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; + + l2cap_conn_start(conn); + } + } else if (type == L2CAP_IT_FIXED_CHAN) { conn->info_state |= L2CAP_INFO_FEAT_MASK_REQ_DONE; + conn->info_ident = 0; l2cap_conn_start(conn); } @@ -2589,7 +2611,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl goto drop; skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), - skb->len); + skb->len); conn->rx_len = len - skb->len; } else { BT_DBG("Cont: frag len %d (expecting %d)", skb->len, conn->rx_len); @@ -2611,7 +2633,7 @@ static int l2cap_recv_acldata(struct hci_conn *hcon, struct sk_buff *skb, u16 fl } skb_copy_from_linear_data(skb, skb_put(conn->rx_skb, skb->len), - skb->len); + skb->len); conn->rx_len -= skb->len; if (!conn->rx_len) { -- cgit v1.2.3 From f29972de8e7476706ab3c01304a505e7c95d9040 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 12 Feb 2009 05:07:45 +0100 Subject: Bluetooth: Add CID field to L2CAP socket address structure In preparation for L2CAP fixed channel support, the CID value of a L2CAP connection needs to be accessible via the socket interface. The CID is the connection identifier and exists as source and destination value. So extend the L2CAP socket address structure with this field and change getsockname() and getpeername() to fill it in. The bind() and connect() functions have been modified to handle L2CAP socket address structures of variable sizes. This makes them future proof if additional fields need to be added. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 55 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 985366c36f48..7bba469b6828 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -770,17 +770,21 @@ static int l2cap_sock_create(struct net *net, struct socket *sock, int protocol) return 0; } -static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) +static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) { - struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; struct sock *sk = sock->sk; - int err = 0; + struct sockaddr_l2 la; + int len, err = 0; - BT_DBG("sk %p, %s %d", sk, batostr(&la->l2_bdaddr), la->l2_psm); + BT_DBG("sk %p", sk); if (!addr || addr->sa_family != AF_BLUETOOTH) return -EINVAL; + memset(&la, 0, sizeof(la)); + len = min_t(unsigned int, sizeof(la), alen); + memcpy(&la, addr, len); + lock_sock(sk); if (sk->sk_state != BT_OPEN) { @@ -788,7 +792,7 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_ goto done; } - if (la->l2_psm && btohs(la->l2_psm) < 0x1001 && + if (la.l2_psm && btohs(la.l2_psm) < 0x1001 && !capable(CAP_NET_BIND_SERVICE)) { err = -EACCES; goto done; @@ -796,16 +800,16 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_ write_lock_bh(&l2cap_sk_list.lock); - if (la->l2_psm && __l2cap_get_sock_by_addr(la->l2_psm, &la->l2_bdaddr)) { + if (la.l2_psm && __l2cap_get_sock_by_addr(la.l2_psm, &la.l2_bdaddr)) { err = -EADDRINUSE; } else { /* Save source address */ - bacpy(&bt_sk(sk)->src, &la->l2_bdaddr); - l2cap_pi(sk)->psm = la->l2_psm; - l2cap_pi(sk)->sport = la->l2_psm; + bacpy(&bt_sk(sk)->src, &la.l2_bdaddr); + l2cap_pi(sk)->psm = la.l2_psm; + l2cap_pi(sk)->sport = la.l2_psm; sk->sk_state = BT_BOUND; - if (btohs(la->l2_psm) == 0x0001 || btohs(la->l2_psm) == 0x0003) + if (btohs(la.l2_psm) == 0x0001 || btohs(la.l2_psm) == 0x0003) l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; } @@ -826,7 +830,8 @@ static int l2cap_do_connect(struct sock *sk) __u8 auth_type; int err = 0; - BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), l2cap_pi(sk)->psm); + BT_DBG("%s -> %s psm 0x%2.2x", batostr(src), batostr(dst), + l2cap_pi(sk)->psm); if (!(hdev = hci_get_route(dst, src))) return -EHOSTUNREACH; @@ -906,20 +911,24 @@ done: static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { - struct sockaddr_l2 *la = (struct sockaddr_l2 *) addr; struct sock *sk = sock->sk; - int err = 0; + struct sockaddr_l2 la; + int len, err = 0; lock_sock(sk); BT_DBG("sk %p", sk); - if (addr->sa_family != AF_BLUETOOTH || alen < sizeof(struct sockaddr_l2)) { + if (!addr || addr->sa_family != AF_BLUETOOTH) { err = -EINVAL; goto done; } - if (sk->sk_type == SOCK_SEQPACKET && !la->l2_psm) { + memset(&la, 0, sizeof(la)); + len = min_t(unsigned int, sizeof(la), alen); + memcpy(&la, addr, len); + + if (sk->sk_type == SOCK_SEQPACKET && !la.l2_psm) { err = -EINVAL; goto done; } @@ -946,8 +955,8 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al } /* Set destination address and psm */ - bacpy(&bt_sk(sk)->dst, &la->l2_bdaddr); - l2cap_pi(sk)->psm = la->l2_psm; + bacpy(&bt_sk(sk)->dst, &la.l2_bdaddr); + l2cap_pi(sk)->psm = la.l2_psm; if ((err = l2cap_do_connect(sk))) goto done; @@ -1071,12 +1080,16 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr, int *l addr->sa_family = AF_BLUETOOTH; *len = sizeof(struct sockaddr_l2); - if (peer) + if (peer) { + la->l2_psm = l2cap_pi(sk)->psm; bacpy(&la->l2_bdaddr, &bt_sk(sk)->dst); - else + la->l2_cid = htobs(l2cap_pi(sk)->dcid); + } else { + la->l2_psm = l2cap_pi(sk)->sport; bacpy(&la->l2_bdaddr, &bt_sk(sk)->src); + la->l2_cid = htobs(l2cap_pi(sk)->scid); + } - la->l2_psm = l2cap_pi(sk)->psm; return 0; } @@ -1208,7 +1221,7 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname, char __us { struct sock *sk = sock->sk; struct l2cap_options opts; - int err = 0, len; + int len, err = 0; u32 opt; BT_DBG("sk %p", sk); -- cgit v1.2.3 From 2950f21acb0f6b8fcd964485c2ebf1e06545ac20 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 12 Feb 2009 14:02:50 +0100 Subject: Bluetooth: Ask upper layers for HCI disconnect reason Some of the qualification tests demand that in case of failures in L2CAP the HCI disconnect should indicate a reason why L2CAP fails. This is a bluntly layer violation since multiple L2CAP connections could be using the same ACL and thus forcing a disconnect reason is not a good idea. To comply with the Bluetooth test specification, the disconnect reason is now stored in the L2CAP connection structure and every time a new L2CAP channel is added it will set back to its default. So only in the case where the L2CAP channel with the disconnect reason is really the last one, it will propagated to the HCI layer. The HCI layer has been extended with a disconnect indication that allows it to ask upper layers for a disconnect reason. The upper layer must not support this callback and in that case it will nicely default to the existing behavior. If an upper layer like L2CAP can provide a disconnect reason that one will be used to disconnect the ACL or SCO link. No modification to the ACL disconnect timeout have been made. So in case of Linux to Linux connection the initiator will disconnect the ACL link before the acceptor side can signal the specific disconnect reason. That is perfectly fine since Linux doesn't make use of this value anyway. The L2CAP layer has a perfect valid error code for rejecting connection due to a security violation. It is unclear why the Bluetooth specification insists on having specific HCI disconnect reason. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 6 ++++-- net/bluetooth/hci_event.c | 2 +- net/bluetooth/l2cap.c | 20 +++++++++++++++++++- net/bluetooth/sco.c | 4 ++-- 4 files changed, 26 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index dcdaa4be7847..96281a11a186 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -159,6 +159,7 @@ static void hci_conn_timeout(unsigned long arg) { struct hci_conn *conn = (void *) arg; struct hci_dev *hdev = conn->hdev; + __u8 reason; BT_DBG("conn %p state %d", conn, conn->state); @@ -177,7 +178,8 @@ static void hci_conn_timeout(unsigned long arg) break; case BT_CONFIG: case BT_CONNECTED: - hci_acl_disconn(conn, 0x13); + reason = hci_proto_disconn_ind(conn); + hci_acl_disconn(conn, reason); break; default: conn->state = BT_CLOSED; @@ -562,7 +564,7 @@ void hci_conn_hash_flush(struct hci_dev *hdev) hci_conn_del_sysfs(c); - hci_proto_disconn_ind(c, 0x16); + hci_proto_disconn_cfm(c, 0x16); hci_conn_del(c); } } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 899b8991a466..c396542c2b82 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1021,7 +1021,7 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff hci_conn_del_sysfs(conn); - hci_proto_disconn_ind(conn, ev->reason); + hci_proto_disconn_cfm(conn, ev->reason); hci_conn_del(conn); } diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 7bba469b6828..d563f2ebcbb3 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -206,6 +206,8 @@ static void __l2cap_chan_add(struct l2cap_conn *conn, struct sock *sk, struct so BT_DBG("conn %p, psm 0x%2.2x, dcid 0x%4.4x", conn, l2cap_pi(sk)->psm, l2cap_pi(sk)->dcid); + conn->disc_reason = 0x13; + l2cap_pi(sk)->conn = conn; if (sk->sk_type == SOCK_SEQPACKET) { @@ -491,6 +493,8 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon, u8 status) spin_lock_init(&conn->lock); rwlock_init(&conn->chan_list.lock); + conn->disc_reason = 0x13; + return conn; } @@ -1840,6 +1844,7 @@ static inline int l2cap_connect_req(struct l2cap_conn *conn, struct l2cap_cmd_hd /* Check if the ACL is secure enough (if not SDP) */ if (psm != cpu_to_le16(0x0001) && !hci_conn_check_link_mode(conn->hcon)) { + conn->disc_reason = 0x05; result = L2CAP_CR_SEC_BLOCK; goto response; } @@ -2472,7 +2477,19 @@ static int l2cap_connect_cfm(struct hci_conn *hcon, u8 status) return 0; } -static int l2cap_disconn_ind(struct hci_conn *hcon, u8 reason) +static int l2cap_disconn_ind(struct hci_conn *hcon) +{ + struct l2cap_conn *conn = hcon->l2cap_data; + + BT_DBG("hcon %p", hcon); + + if (hcon->type != ACL_LINK || !conn) + return 0x13; + + return conn->disc_reason; +} + +static int l2cap_disconn_cfm(struct hci_conn *hcon, u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); @@ -2717,6 +2734,7 @@ static struct hci_proto l2cap_hci_proto = { .connect_ind = l2cap_connect_ind, .connect_cfm = l2cap_connect_cfm, .disconn_ind = l2cap_disconn_ind, + .disconn_cfm = l2cap_disconn_cfm, .security_cfm = l2cap_security_cfm, .recv_acldata = l2cap_recv_acldata }; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 7f10f97cd697..51ae0c3e470a 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -902,7 +902,7 @@ static int sco_connect_cfm(struct hci_conn *hcon, __u8 status) return 0; } -static int sco_disconn_ind(struct hci_conn *hcon, __u8 reason) +static int sco_disconn_cfm(struct hci_conn *hcon, __u8 reason) { BT_DBG("hcon %p reason %d", hcon, reason); @@ -985,7 +985,7 @@ static struct hci_proto sco_hci_proto = { .id = HCI_PROTO_SCO, .connect_ind = sco_connect_ind, .connect_cfm = sco_connect_cfm, - .disconn_ind = sco_disconn_ind, + .disconn_cfm = sco_disconn_cfm, .recv_scodata = sco_recv_scodata }; -- cgit v1.2.3 From 00ae4af91d8c5b6814e2bb3bfaaf743845f989eb Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 12 Feb 2009 16:19:45 +0100 Subject: Bluetooth: Fix authentication requirements for L2CAP security check The L2CAP layer can trigger the authentication via an ACL connection or later on to increase the security level. When increasing the security level it didn't use the same authentication requirements when triggering a new ACL connection. Make sure that exactly the same authentication requirements are used. The only exception here are the L2CAP raw sockets which are only used for dedicated bonding. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index d563f2ebcbb3..79a4325a1388 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -268,16 +268,26 @@ static inline int l2cap_check_security(struct sock *sk) struct l2cap_conn *conn = l2cap_pi(sk)->conn; __u8 auth_type; - switch (l2cap_pi(sk)->sec_level) { - case BT_SECURITY_HIGH: - auth_type = HCI_AT_GENERAL_BONDING_MITM; - break; - case BT_SECURITY_MEDIUM: - auth_type = HCI_AT_GENERAL_BONDING; - break; - default: - auth_type = HCI_AT_NO_BONDING; - break; + if (l2cap_pi(sk)->psm == cpu_to_le16(0x0001)) { + if (l2cap_pi(sk)->sec_level == BT_SECURITY_HIGH) + auth_type = HCI_AT_NO_BONDING_MITM; + else + auth_type = HCI_AT_NO_BONDING; + + if (l2cap_pi(sk)->sec_level == BT_SECURITY_LOW) + l2cap_pi(sk)->sec_level = BT_SECURITY_SDP; + } else { + switch (l2cap_pi(sk)->sec_level) { + case BT_SECURITY_HIGH: + auth_type = HCI_AT_GENERAL_BONDING_MITM; + break; + case BT_SECURITY_MEDIUM: + auth_type = HCI_AT_GENERAL_BONDING; + break; + default: + auth_type = HCI_AT_NO_BONDING; + break; + } } return hci_conn_security(conn->hcon, l2cap_pi(sk)->sec_level, -- cgit v1.2.3 From 96a3183322cba1a2846771b067c99b9d6f481263 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 12 Feb 2009 16:23:03 +0100 Subject: Bluetooth: Set authentication requirement before requesting it The authentication requirement got only updated when the security level increased. This is a wrong behavior. The authentication requirement is read by the Bluetooth daemon to make proper decisions when handling the IO capabilities exchange. So set the value that is currently expected by the higher layers like L2CAP and RFCOMM. Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 96281a11a186..efd5c926cc1b 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -397,12 +397,13 @@ static int hci_conn_auth(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("conn %p", conn); - if (sec_level > conn->sec_level) { + if (sec_level > conn->sec_level) conn->sec_level = sec_level; - conn->auth_type = auth_type; - } else if (conn->link_mode & HCI_LM_AUTH) + else if (conn->link_mode & HCI_LM_AUTH) return 1; + conn->auth_type = auth_type; + if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->pend)) { struct hci_cp_auth_requested cp; cp.handle = cpu_to_le16(conn->handle); @@ -418,9 +419,6 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) { BT_DBG("conn %p", conn); - if (conn->auth_type == 0xff) - conn->auth_type = auth_type; - if (sec_level == BT_SECURITY_SDP) return 1; -- cgit v1.2.3 From d5f2d2be68876f65dd051b978a7b66265fde9ffd Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 16 Feb 2009 02:57:30 +0100 Subject: Bluetooth: Fix poll() misbehavior when using BT_DEFER_SETUP When BT_DEFER_SETUP has been enabled on a Bluetooth socket it keeps signaling POLLIN all the time. This is a wrong behavior. The POLLIN should only be signaled if the client socket is in BT_CONNECT2 state and the parent has been BT_DEFER_SETUP enabled. Signed-off-by: Marcel Holtmann --- net/bluetooth/af_bluetooth.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ded57974390e..02b9baa1930b 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -270,12 +270,11 @@ static inline unsigned int bt_accept_poll(struct sock *parent) struct list_head *p, *n; struct sock *sk; - if (bt_sk(parent)->defer_setup) - return POLLIN | POLLRDNORM; - list_for_each_safe(p, n, &bt_sk(parent)->accept_q) { sk = (struct sock *) list_entry(p, struct bt_sock, accept_q); - if (sk->sk_state == BT_CONNECTED) + if (sk->sk_state == BT_CONNECTED || + (bt_sk(parent)->defer_setup && + sk->sk_state == BT_CONNECT2)) return POLLIN | POLLRDNORM; } -- cgit v1.2.3 From 8bf4794174659b06d43cc5e290cd384757374613 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 16 Feb 2009 02:59:49 +0100 Subject: Bluetooth: Change RFCOMM to use BT_CONNECT2 for BT_DEFER_SETUP When BT_DEFER_SETUP is enabled on a RFCOMM socket, then switch its current state from BT_OPEN to BT_CONNECT2. This gives the Bluetooth core a unified way to handle L2CAP and RFCOMM sockets. The BT_CONNECT2 state is designated for incoming connections. Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 1828ec06ad1c..5576c8191507 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -448,6 +448,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) break; case BT_OPEN: + case BT_CONNECT2: if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { set_bit(RFCOMM_AUTH_REJECT, &d->flags); rfcomm_schedule(RFCOMM_SCHED_AUTH); @@ -1208,6 +1209,11 @@ static void rfcomm_check_accept(struct rfcomm_dlc *d) if (d->defer_setup) { set_bit(RFCOMM_DEFER_SETUP, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); + + rfcomm_dlc_lock(d); + d->state = BT_CONNECT2; + d->state_change(d, 0); + rfcomm_dlc_unlock(d); } else rfcomm_dlc_accept(d); } else { @@ -1749,6 +1755,11 @@ static inline void rfcomm_process_dlcs(struct rfcomm_session *s) if (d->defer_setup) { set_bit(RFCOMM_DEFER_SETUP, &d->flags); rfcomm_dlc_set_timer(d, RFCOMM_AUTH_TIMEOUT); + + rfcomm_dlc_lock(d); + d->state = BT_CONNECT2; + d->state_change(d, 0); + rfcomm_dlc_unlock(d); } else rfcomm_dlc_accept(d); } -- cgit v1.2.3 From 2a517ca687232adc8f14893730644da712010ffc Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Mon, 16 Feb 2009 03:20:31 +0100 Subject: Bluetooth: Disallow usage of L2CAP CID setting for now In the future the L2CAP layer will have full support for fixed channels and right now it already can export the channel assignment, but for the functions bind() and connect() the usage of only CID 0 is allowed. This allows an easy detection if the kernel supports fixed channels or not, because otherwise it would impossible for application to tell. Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 79a4325a1388..7c6768c2a530 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -799,6 +799,9 @@ static int l2cap_sock_bind(struct socket *sock, struct sockaddr *addr, int alen) len = min_t(unsigned int, sizeof(la), alen); memcpy(&la, addr, len); + if (la.l2_cid) + return -EINVAL; + lock_sock(sk); if (sk->sk_state != BT_OPEN) { @@ -929,19 +932,20 @@ static int l2cap_sock_connect(struct socket *sock, struct sockaddr *addr, int al struct sockaddr_l2 la; int len, err = 0; - lock_sock(sk); - BT_DBG("sk %p", sk); - if (!addr || addr->sa_family != AF_BLUETOOTH) { - err = -EINVAL; - goto done; - } + if (!addr || addr->sa_family != AF_BLUETOOTH) + return -EINVAL; memset(&la, 0, sizeof(la)); len = min_t(unsigned int, sizeof(la), alen); memcpy(&la, addr, len); + if (la.l2_cid) + return -EINVAL; + + lock_sock(sk); + if (sk->sk_type == SOCK_SEQPACKET && !la.l2_psm) { err = -EINVAL; goto done; -- cgit v1.2.3 From 37e62f5516cfb210e64fe53457932df4341b0ad1 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 17 Feb 2009 21:49:33 +0100 Subject: Bluetooth: Fix RFCOMM usage of in-kernel L2CAP sockets The CID value of L2CAP sockets need to be set to zero. All userspace applications do this via memset() on the sockaddr_l2 structure. The RFCOMM implementation uses in-kernel L2CAP sockets and so it has to make sure that l2_cid is set to zero. Signed-off-by: Marcel Holtmann --- net/bluetooth/rfcomm/core.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 5576c8191507..1d0fb0f23c63 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -658,6 +658,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst bacpy(&addr.l2_bdaddr, src); addr.l2_family = AF_BLUETOOTH; addr.l2_psm = 0; + addr.l2_cid = 0; *err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (*err < 0) goto failed; @@ -679,6 +680,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src, bdaddr_t *dst bacpy(&addr.l2_bdaddr, dst); addr.l2_family = AF_BLUETOOTH; addr.l2_psm = htobs(RFCOMM_PSM); + addr.l2_cid = 0; *err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK); if (*err == 0 || *err == -EINPROGRESS) return s; @@ -1919,6 +1921,7 @@ static int rfcomm_add_listener(bdaddr_t *ba) bacpy(&addr.l2_bdaddr, ba); addr.l2_family = AF_BLUETOOTH; addr.l2_psm = htobs(RFCOMM_PSM); + addr.l2_cid = 0; err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr)); if (err < 0) { BT_ERR("Bind failed %d", err); -- cgit v1.2.3 From 2526d3d8b2f671a7d36cc486af984052cd5a690f Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Fri, 20 Feb 2009 20:54:06 +0100 Subject: Bluetooth: Permit BT_SECURITY also for L2CAP raw sockets Userspace pairing code can be simplified if it doesn't have to fall back to using L2CAP_LM in the case of L2CAP raw sockets. This patch allows the BT_SECURITY socket option to be used for these sockets. Signed-off-by: Johan Hedberg Signed-off-by: Marcel Holtmann --- net/bluetooth/l2cap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index 7c6768c2a530..db6fbf129be0 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -1308,7 +1308,7 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname, ch switch (optname) { case BT_SECURITY: - if (sk->sk_type != SOCK_SEQPACKET) { + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) { err = -EINVAL; break; } @@ -1455,7 +1455,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname, ch switch (optname) { case BT_SECURITY: - if (sk->sk_type != SOCK_SEQPACKET) { + if (sk->sk_type != SOCK_SEQPACKET && sk->sk_type != SOCK_RAW) { err = -EINVAL; break; } -- cgit v1.2.3 From 2ae9a6be5f476f3512839a4d11a8f432bfd2914c Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sat, 21 Feb 2009 16:13:34 +0800 Subject: Bluetooth: Move hci_conn_del_sysfs() back to avoid device destruct too early The following commit introduce a regression: commit 7d0db0a373195385a2e0b19d1f5e4b186fdcffac Author: Marcel Holtmann Date: Mon Jul 14 20:13:51 2008 +0200 [Bluetooth] Use a more unique bus name for connections I get panic as following (by netconsole): [ 2709.344034] usb 5-1: new full speed USB device using uhci_hcd and address 4 [ 2709.505776] usb 5-1: configuration #1 chosen from 1 choice [ 2709.569207] Bluetooth: Generic Bluetooth USB driver ver 0.4 [ 2709.570169] usbcore: registered new interface driver btusb [ 2845.742781] BUG: unable to handle kernel paging request at 6b6b6c2f [ 2845.742958] IP: [] __lock_acquire+0x6c/0xa80 [ 2845.743087] *pde = 00000000 [ 2845.743206] Oops: 0002 [#1] SMP [ 2845.743377] last sysfs file: /sys/class/bluetooth/hci0/hci0:6/type [ 2845.743742] Modules linked in: btusb netconsole snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss rfcomm l2cap bluetooth vfat fuse snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_pcm pl2303 snd_timer psmouse usbserial snd 3c59x e100 serio_raw soundcore i2c_i801 intel_agp mii agpgart snd_page_alloc rtc_cmos rtc_core thermal processor rtc_lib button thermal_sys sg evdev [ 2845.743742] [ 2845.743742] Pid: 0, comm: swapper Not tainted (2.6.29-rc5-smp #54) Dell DM051 [ 2845.743742] EIP: 0060:[] EFLAGS: 00010002 CPU: 0 [ 2845.743742] EIP is at __lock_acquire+0x6c/0xa80 [ 2845.743742] EAX: 00000046 EBX: 00000046 ECX: 6b6b6b6b EDX: 00000002 [ 2845.743742] ESI: 6b6b6b6b EDI: 00000000 EBP: c064fd14 ESP: c064fcc8 [ 2845.743742] DS: 007b ES: 007b FS: 00d8 GS: 0000 SS: 0068 [ 2845.743742] Process swapper (pid: 0, ti=c064e000 task=c05d1400 task.ti=c064e000) [ 2845.743742] Stack: [ 2845.743742] c05d1400 00000002 c05d1400 00000001 00000002 00000000 f65388dc c05d1400 [ 2845.743742] 6b6b6b6b 00000292 c064fd0c c0153732 00000000 00000000 00000001 f700fa50 [ 2845.743742] 00000046 00000000 00000000 c064fd40 c0155be6 00000000 00000002 00000001 [ 2845.743742] Call Trace: [ 2845.743742] [] ? trace_hardirqs_on_caller+0x72/0x1c0 [ 2845.743742] [] ? lock_acquire+0x76/0xa0 [ 2845.743742] [] ? skb_dequeue+0x1d/0x70 [ 2845.743742] [] ? _spin_lock_irqsave+0x45/0x80 [ 2845.743742] [] ? skb_dequeue+0x1d/0x70 [ 2845.743742] [] ? skb_dequeue+0x1d/0x70 [ 2845.743742] [] ? skb_queue_purge+0x14/0x20 [ 2845.743742] [] ? hci_conn_del+0x10a/0x1c0 [bluetooth] [ 2845.743742] [] ? l2cap_disconn_ind+0x59/0xb0 [l2cap] [ 2845.743742] [] ? hci_conn_del_sysfs+0x8e/0xd0 [bluetooth] [ 2845.743742] [] ? hci_event_packet+0x5f8/0x31c0 [bluetooth] [ 2845.743742] [] ? sock_def_readable+0x59/0x80 [ 2845.743742] [] ? _read_unlock+0x1d/0x20 [ 2845.743742] [] ? hci_send_to_sock+0xe9/0x1d0 [bluetooth] [ 2845.743742] [] ? trace_hardirqs_on+0xb/0x10 [ 2845.743742] [] ? hci_rx_task+0x2ba/0x490 [bluetooth] [ 2845.743742] [] ? tasklet_action+0x31/0xc0 [ 2845.743742] [] ? tasklet_action+0x4c/0xc0 [ 2845.743742] [] ? __do_softirq+0xa7/0x170 [ 2845.743742] [] ? ack_apic_level+0x5c/0x1c0 [ 2845.743742] [] ? do_softirq+0x57/0x60 [ 2845.743742] [] ? irq_exit+0x7c/0x90 [ 2845.743742] [] ? do_IRQ+0x4b/0x90 [ 2845.743742] [] ? irq_exit+0x75/0x90 [ 2845.743742] [] ? common_interrupt+0x2c/0x34 [ 2845.743742] [] ? mwait_idle+0x4f/0x70 [ 2845.743742] [] ? cpu_idle+0x65/0xb0 [ 2845.743742] [] ? rest_init+0x4e/0x60 [ 2845.743742] Code: 0f 84 69 02 00 00 83 ff 07 0f 87 1e 06 00 00 85 ff 0f 85 08 05 00 00 8b 4d cc 8b 49 04 85 c9 89 4d d4 0f 84 f7 04 00 00 8b 75 d4 ff 86 c4 00 00 00 89 f0 e8 56 a9 ff ff 85 c0 0f 85 6e 03 00 [ 2845.743742] EIP: [] __lock_acquire+0x6c/0xa80 SS:ESP 0068:c064fcc8 [ 2845.743742] ---[ end trace 4c985b38f022279f ]--- [ 2845.743742] Kernel panic - not syncing: Fatal exception in interrupt [ 2845.743742] ------------[ cut here ]------------ [ 2845.743742] WARNING: at kernel/smp.c:329 smp_call_function_many+0x151/0x200() [ 2845.743742] Hardware name: Dell DM051 [ 2845.743742] Modules linked in: btusb netconsole snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss rfcomm l2cap bluetooth vfat fuse snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_pcm pl2303 snd_timer psmouse usbserial snd 3c59x e100 serio_raw soundcore i2c_i801 intel_agp mii agpgart snd_page_alloc rtc_cmos rtc_core thermal processor rtc_lib button thermal_sys sg evdev [ 2845.743742] Pid: 0, comm: swapper Tainted: G D 2.6.29-rc5-smp #54 [ 2845.743742] Call Trace: [ 2845.743742] [] warn_slowpath+0x86/0xa0 [ 2845.743742] [] ? trace_hardirqs_off+0xb/0x10 [ 2845.743742] [] ? up+0x14/0x40 [ 2845.743742] [] ? release_console_sem+0x31/0x1e0 [ 2845.743742] [] ? _spin_lock_irqsave+0x6b/0x80 [ 2845.743742] [] ? trace_hardirqs_off+0xb/0x10 [ 2845.743742] [] ? _read_lock_irqsave+0x40/0x80 [ 2845.743742] [] ? release_console_sem+0x1c2/0x1e0 [ 2845.743742] [] ? up+0x14/0x40 [ 2845.743742] [] ? trace_hardirqs_off+0xb/0x10 [ 2845.743742] [] ? __mutex_unlock_slowpath+0x97/0x160 [ 2845.743742] [] ? mutex_trylock+0xb3/0x180 [ 2845.743742] [] ? mutex_unlock+0x8/0x10 [ 2845.743742] [] smp_call_function_many+0x151/0x200 [ 2845.743742] [] ? stop_this_cpu+0x0/0x40 [ 2845.743742] [] smp_call_function+0x21/0x30 [ 2845.743742] [] native_smp_send_stop+0x1e/0x50 [ 2845.743742] [] panic+0x55/0x110 [ 2845.743742] [] oops_end+0xb8/0xc0 [ 2845.743742] [] die+0x4f/0x70 [ 2845.743742] [] do_page_fault+0x269/0x610 [ 2845.743742] [] ? do_page_fault+0x0/0x610 [ 2845.743742] [] error_code+0x77/0x7c [ 2845.743742] [] ? __lock_acquire+0x6c/0xa80 [ 2845.743742] [] ? trace_hardirqs_on_caller+0x72/0x1c0 [ 2845.743742] [] lock_acquire+0x76/0xa0 [ 2845.743742] [] ? skb_dequeue+0x1d/0x70 [ 2845.743742] [] _spin_lock_irqsave+0x45/0x80 [ 2845.743742] [] ? skb_dequeue+0x1d/0x70 [ 2845.743742] [] skb_dequeue+0x1d/0x70 [ 2845.743742] [] skb_queue_purge+0x14/0x20 [ 2845.743742] [] hci_conn_del+0x10a/0x1c0 [bluetooth] [ 2845.743742] [] ? l2cap_disconn_ind+0x59/0xb0 [l2cap] [ 2845.743742] [] ? hci_conn_del_sysfs+0x8e/0xd0 [bluetooth] [ 2845.743742] [] hci_event_packet+0x5f8/0x31c0 [bluetooth] [ 2845.743742] [] ? sock_def_readable+0x59/0x80 [ 2845.743742] [] ? _read_unlock+0x1d/0x20 [ 2845.743742] [] ? hci_send_to_sock+0xe9/0x1d0 [bluetooth] [ 2845.743742] [] ? trace_hardirqs_on+0xb/0x10 [ 2845.743742] [] hci_rx_task+0x2ba/0x490 [bluetooth] [ 2845.743742] [] ? tasklet_action+0x31/0xc0 [ 2845.743742] [] tasklet_action+0x4c/0xc0 [ 2845.743742] [] __do_softirq+0xa7/0x170 [ 2845.743742] [] ? ack_apic_level+0x5c/0x1c0 [ 2845.743742] [] do_softirq+0x57/0x60 [ 2845.743742] [] irq_exit+0x7c/0x90 [ 2845.743742] [] do_IRQ+0x4b/0x90 [ 2845.743742] [] ? irq_exit+0x75/0x90 [ 2845.743742] [] common_interrupt+0x2c/0x34 [ 2845.743742] [] ? mwait_idle+0x4f/0x70 [ 2845.743742] [] cpu_idle+0x65/0xb0 [ 2845.743742] [] rest_init+0x4e/0x60 [ 2845.743742] ---[ end trace 4c985b38f02227a0 ]--- [ 2845.743742] ------------[ cut here ]------------ [ 2845.743742] WARNING: at kernel/smp.c:226 smp_call_function_single+0x8e/0x110() [ 2845.743742] Hardware name: Dell DM051 [ 2845.743742] Modules linked in: btusb netconsole snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss rfcomm l2cap bluetooth vfat fuse snd_hda_codec_idt snd_hda_intel snd_hda_codec snd_hwdep snd_pcm pl2303 snd_timer psmouse usbserial snd 3c59x e100 serio_raw soundcore i2c_i801 intel_agp mii agpgart snd_page_alloc rtc_cmos rtc_core thermal processor rtc_lib button thermal_sys sg evdev [ 2845.743742] Pid: 0, comm: swapper Tainted: G D W 2.6.29-rc5-smp #54 [ 2845.743742] Call Trace: [ 2845.743742] [] warn_slowpath+0x86/0xa0 [ 2845.743742] [] ? warn_slowpath+0x10/0xa0 [ 2845.743742] [] ? trace_hardirqs_off+0xb/0x10 [ 2845.743742] [] ? up+0x14/0x40 [ 2845.743742] [] ? release_console_sem+0x31/0x1e0 [ 2845.743742] [] ? _spin_lock_irqsave+0x6b/0x80 [ 2845.743742] [] ? trace_hardirqs_off+0xb/0x10 [ 2845.743742] [] ? _read_lock_irqsave+0x40/0x80 [ 2845.743742] [] ? release_console_sem+0x1c2/0x1e0 [ 2845.743742] [] ? up+0x14/0x40 [ 2845.743742] [] smp_call_function_single+0x8e/0x110 [ 2845.743742] [] ? stop_this_cpu+0x0/0x40 [ 2845.743742] [] ? cpumask_next_and+0x1f/0x40 [ 2845.743742] [] smp_call_function_many+0x11a/0x200 [ 2845.743742] [] ? stop_this_cpu+0x0/0x40 [ 2845.743742] [] smp_call_function+0x21/0x30 [ 2845.743742] [] native_smp_send_stop+0x1e/0x50 [ 2845.743742] [] panic+0x55/0x110 [ 2845.743742] [] oops_end+0xb8/0xc0 [ 2845.743742] [] die+0x4f/0x70 [ 2845.743742] [] do_page_fault+0x269/0x610 [ 2845.743742] [] ? do_page_fault+0x0/0x610 [ 2845.743742] [] error_code+0x77/0x7c [ 2845.743742] [] ? __lock_acquire+0x6c/0xa80 [ 2845.743742] [] ? trace_hardirqs_on_caller+0x72/0x1c0 [ 2845.743742] [] lock_acquire+0x76/0xa0 [ 2845.743742] [] ? skb_dequeue+0x1d/0x70 [ 2845.743742] [] _spin_lock_irqsave+0x45/0x80 [ 2845.743742] [] ? skb_dequeue+0x1d/0x70 [ 2845.743742] [] skb_dequeue+0x1d/0x70 [ 2845.743742] [] skb_queue_purge+0x14/0x20 [ 2845.743742] [] hci_conn_del+0x10a/0x1c0 [bluetooth] [ 2845.743742] [] ? l2cap_disconn_ind+0x59/0xb0 [l2cap] [ 2845.743742] [] ? hci_conn_del_sysfs+0x8e/0xd0 [bluetooth] [ 2845.743742] [] hci_event_packet+0x5f8/0x31c0 [bluetooth] [ 2845.743742] [] ? sock_def_readable+0x59/0x80 [ 2845.743742] [] ? _read_unlock+0x1d/0x20 [ 2845.743742] [] ? hci_send_to_sock+0xe9/0x1d0 [bluetooth] [ 2845.743742] [] ? trace_hardirqs_on+0xb/0x10 [ 2845.743742] [] hci_rx_task+0x2ba/0x490 [bluetooth] [ 2845.743742] [] ? tasklet_action+0x31/0xc0 [ 2845.743742] [] tasklet_action+0x4c/0xc0 [ 2845.743742] [] __do_softirq+0xa7/0x170 [ 2845.743742] [] ? ack_apic_level+0x5c/0x1c0 [ 2845.743742] [] do_softirq+0x57/0x60 [ 2845.743742] [] irq_exit+0x7c/0x90 [ 2845.743742] [] do_IRQ+0x4b/0x90 [ 2845.743742] [] ? irq_exit+0x75/0x90 [ 2845.743742] [] common_interrupt+0x2c/0x34 [ 2845.743742] [] ? mwait_idle+0x4f/0x70 [ 2845.743742] [] cpu_idle+0x65/0xb0 [ 2845.743742] [] rest_init+0x4e/0x60 [ 2845.743742] ---[ end trace 4c985b38f02227a1 ]--- [ 2845.743742] Rebooting in 3 seconds.. My logitec bluetooth mouse trying connect to pc, but pc side reject the connection again and again. then panic happens. The reason is due to hci_conn_del_sysfs now called in hci_event_packet, the del work is done in a workqueue, so it's possible done before skb_queue_purge called. I move the hci_conn_del_sysfs after skb_queue_purge just as that before marcel's commit. Remove the hci_conn_del_sysfs in hci_conn_hash_flush as well due to hci_conn_del will deal with the work. Signed-off-by: Dave Young Signed-off-by: Marcel Holtmann --- net/bluetooth/hci_conn.c | 4 ++-- net/bluetooth/hci_event.c | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index efd5c926cc1b..1181db08d9de 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -287,6 +287,8 @@ int hci_conn_del(struct hci_conn *conn) skb_queue_purge(&conn->data_q); + hci_conn_del_sysfs(conn); + return 0; } @@ -560,8 +562,6 @@ void hci_conn_hash_flush(struct hci_dev *hdev) c->state = BT_CLOSED; - hci_conn_del_sysfs(c); - hci_proto_disconn_cfm(c, 0x16); hci_conn_del(c); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c396542c2b82..55534244c3a0 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1019,8 +1019,6 @@ static inline void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff if (conn) { conn->state = BT_CLOSED; - hci_conn_del_sysfs(conn); - hci_proto_disconn_cfm(conn, ev->reason); hci_conn_del(conn); } -- cgit v1.2.3 From 7585b97a48180f754ebdade1be94092e36bef365 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 18:29:52 +0800 Subject: Bluetooth: Remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: Marcel Holtmann --- net/bluetooth/cmtp/core.c | 3 +-- net/bluetooth/hci_core.c | 3 +-- net/bluetooth/l2cap.c | 3 +-- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/cmtp/core.c b/net/bluetooth/cmtp/core.c index c9cac7719efe..0073ec8495da 100644 --- a/net/bluetooth/cmtp/core.c +++ b/net/bluetooth/cmtp/core.c @@ -126,8 +126,7 @@ static inline void cmtp_add_msgpart(struct cmtp_session *session, int id, const session->reassembly[id] = nskb; - if (skb) - kfree_skb(skb); + kfree_skb(skb); } static inline int cmtp_recv_frame(struct cmtp_session *session, struct sk_buff *skb) diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ba78cc1eb8d9..cd061510b6bd 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1565,8 +1565,7 @@ static void hci_cmd_task(unsigned long arg) /* Send queued commands */ if (atomic_read(&hdev->cmd_cnt) && (skb = skb_dequeue(&hdev->cmd_q))) { - if (hdev->sent_cmd) - kfree_skb(hdev->sent_cmd); + kfree_skb(hdev->sent_cmd); if ((hdev->sent_cmd = skb_clone(skb, GFP_ATOMIC))) { atomic_dec(&hdev->cmd_cnt); diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c index db6fbf129be0..ca4d3b40d5ce 100644 --- a/net/bluetooth/l2cap.c +++ b/net/bluetooth/l2cap.c @@ -518,8 +518,7 @@ static void l2cap_conn_del(struct hci_conn *hcon, int err) BT_DBG("hcon %p conn %p, err %d", hcon, conn, err); - if (conn->rx_skb) - kfree_skb(conn->rx_skb); + kfree_skb(conn->rx_skb); /* Kill channels */ while ((sk = conn->chan_list.head)) { -- cgit v1.2.3 From 6f961068671698c242f1828960485fef1392916f Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:31:04 +0000 Subject: af_key: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/key/af_key.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/key/af_key.c b/net/key/af_key.c index 7dcbde3ea7d9..643c1be2d02e 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -313,8 +313,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, if (one_sk != NULL) err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); - if (skb2) - kfree_skb(skb2); + kfree_skb(skb2); kfree_skb(skb); return err; } @@ -3573,8 +3572,7 @@ static int pfkey_sendmsg(struct kiocb *kiocb, out: if (err && hdr && pfkey_error(hdr, err, sk) == 0) err = 0; - if (skb) - kfree_skb(skb); + kfree_skb(skb); return err ? : len; } -- cgit v1.2.3 From 86dc1ad2be17a7436ee8c6799f6b55e5a5b930f4 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:31:54 +0000 Subject: pktgen: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/core/pktgen.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 65498483325a..32d419f5ac98 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3275,8 +3275,7 @@ static void pktgen_stop(struct pktgen_thread *t) list_for_each_entry(pkt_dev, &t->if_list, list) { pktgen_stop_device(pkt_dev); - if (pkt_dev->skb) - kfree_skb(pkt_dev->skb); + kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; } @@ -3303,8 +3302,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t) if (!cur->removal_mark) continue; - if (cur->skb) - kfree_skb(cur->skb); + kfree_skb(cur->skb); cur->skb = NULL; pktgen_remove_device(t, cur); @@ -3328,8 +3326,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t) list_for_each_safe(q, n, &t->if_list) { cur = list_entry(q, struct pktgen_dev, list); - if (cur->skb) - kfree_skb(cur->skb); + kfree_skb(cur->skb); cur->skb = NULL; pktgen_remove_device(t, cur); @@ -3393,8 +3390,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) if (!netif_running(odev)) { pktgen_stop_device(pkt_dev); - if (pkt_dev->skb) - kfree_skb(pkt_dev->skb); + kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; goto out; } @@ -3415,8 +3411,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) if ((++pkt_dev->clone_count >= pkt_dev->clone_skb) || (!pkt_dev->skb)) { /* build a new pkt */ - if (pkt_dev->skb) - kfree_skb(pkt_dev->skb); + kfree_skb(pkt_dev->skb); pkt_dev->skb = fill_packet(odev, pkt_dev); if (pkt_dev->skb == NULL) { @@ -3498,8 +3493,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) /* Done with this */ pktgen_stop_device(pkt_dev); - if (pkt_dev->skb) - kfree_skb(pkt_dev->skb); + kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; } out:; -- cgit v1.2.3 From 40d44446cf10d9d118e8f0132c94e1f25ea3be97 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:32:45 +0000 Subject: unix: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/unix/af_unix.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index d1b89820ab4f..baac91049b0e 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1178,8 +1178,7 @@ out_unlock: unix_state_unlock(other); out: - if (skb) - kfree_skb(skb); + kfree_skb(skb); if (newsk) unix_release_sock(newsk, 0); if (other) -- cgit v1.2.3 From 91744f6559393697e13bf0f9f3f35f884e2520f9 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:34:41 +0000 Subject: netlink: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2760b62dc2c1..e57d700bf6d9 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1049,8 +1049,7 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, netlink_unlock_table(); - if (info.skb2) - kfree_skb(info.skb2); + kfree_skb(info.skb2); if (info.delivery_failure) return -ENOBUFS; @@ -1542,8 +1541,7 @@ EXPORT_SYMBOL(netlink_set_nonroot); static void netlink_destroy_callback(struct netlink_callback *cb) { - if (cb->skb) - kfree_skb(cb->skb); + kfree_skb(cb->skb); kfree(cb); } -- cgit v1.2.3 From ce030edfb4ba6734248b8d9f98d0a6f1dcd142e7 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:35:44 +0000 Subject: can: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/can/af_can.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/can/af_can.c b/net/can/af_can.c index d90e8dd975fc..547bafc79e28 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -273,8 +273,7 @@ int can_send(struct sk_buff *skb, int loop) err = net_xmit_errno(err); if (err) { - if (newskb) - kfree_skb(newskb); + kfree_skb(newskb); return err; } -- cgit v1.2.3 From acb5d75b9bcff73d32d3471a9b3e9a4189223e48 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:36:42 +0000 Subject: packet: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/packet/af_packet.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 1fc4a7885c41..d8cc006fac45 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -756,8 +756,7 @@ ring_is_full: spin_unlock(&sk->sk_receive_queue.lock); sk->sk_data_ready(sk, 0); - if (copy_skb) - kfree_skb(copy_skb); + kfree_skb(copy_skb); goto drop_n_restore; } -- cgit v1.2.3 From f3fbbe0f6f6cbac4c2aa3d71d95e49cf148286d6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:37:32 +0000 Subject: core: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 ++---- net/core/skbuff.c | 3 +-- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index e1144cb94b99..417b6d739fb7 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -871,8 +871,7 @@ static void neigh_timer_handler(unsigned long arg) write_unlock(&neigh->lock); neigh->ops->solicit(neigh, skb); atomic_inc(&neigh->probes); - if (skb) - kfree_skb(skb); + kfree_skb(skb); } else { out: write_unlock(&neigh->lock); @@ -908,8 +907,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) neigh->updated = jiffies; write_unlock_bh(&neigh->lock); - if (skb) - kfree_skb(skb); + kfree_skb(skb); return 1; } } else if (neigh->nud_state & NUD_STALE) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 33640d99c8ed..e5e2111a397d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1208,8 +1208,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) insp = list; } if (!pskb_pull(list, eat)) { - if (clone) - kfree_skb(clone); + kfree_skb(clone); return NULL; } break; -- cgit v1.2.3 From db849df63cdc95e0345b1f4bcf8bbfa19ef592b6 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:39:01 +0000 Subject: decnet: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 12bf7d4c16c6..963da86d4ecf 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2071,8 +2071,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock, } out: - if (skb) - kfree_skb(skb); + kfree_skb(skb); release_sock(sk); -- cgit v1.2.3 From 47a30b26e58ab7e56e5654766fd678a4b90010e3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:41:03 +0000 Subject: iucv: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/iucv/af_iucv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index eb8a2a0b6eb7..49e786535dc8 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1171,8 +1171,7 @@ static void iucv_callback_txdone(struct iucv_path *path, spin_unlock_irqrestore(&list->lock, flags); - if (this) - kfree_skb(this); + kfree_skb(this); } BUG_ON(!this); -- cgit v1.2.3 From c3431ea71ee2ec9d892b5d7a83eb0afcf8c79263 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Wed, 25 Feb 2009 00:42:22 +0000 Subject: llc: remove some pointless conditionals before kfree_skb() Remove some pointless conditionals before kfree_skb(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/llc/llc_conn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 5c6d89c6d51d..3477624a4906 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -332,8 +332,7 @@ int llc_conn_remove_acked_pdus(struct sock *sk, u8 nr, u16 *how_many_unacked) for (i = 0; i < pdu_pos && i < q_len; i++) { skb = skb_dequeue(&llc->pdu_unack_q); - if (skb) - kfree_skb(skb); + kfree_skb(skb); nbr_acked++; } out: -- cgit v1.2.3 From 81c553299ffc4ce3512a6ea3eaf5070c2238c960 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Wed, 25 Feb 2009 10:31:24 +0000 Subject: net/802: fix sparse warnings: context imbalance Impact: Attribute function with __acquires(...) resp. __releases(...). Fix this sparse warnings: net/802/tr.c:492:21: warning: context imbalance in 'rif_seq_start' - wrong count at exit net/802/tr.c:519:13: warning: context imbalance in 'rif_seq_stop' - unexpected unlock Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/802/tr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/802/tr.c b/net/802/tr.c index 158150fee462..dab6334b6258 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -486,6 +486,7 @@ static struct rif_cache *rif_get_idx(loff_t pos) } static void *rif_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(&rif_lock) { spin_lock_irq(&rif_lock); @@ -517,6 +518,7 @@ static void *rif_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void rif_seq_stop(struct seq_file *seq, void *v) + __releases(&rif_lock) { spin_unlock_irq(&rif_lock); } -- cgit v1.2.3 From e3db6cb42123f6dbde36122a4c81a06282370e1e Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Wed, 25 Feb 2009 10:31:37 +0000 Subject: 9p: fix sparse warning: cast adds address space Impact: Trust in the comment and add '__force' to the cast. Fix this sparse warning: net/9p/trans_fd.c:420:34: warning: cast adds address space to expression () Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/9p/trans_fd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 1df0356f242b..c613ed08a5ee 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -417,7 +417,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len) oldfs = get_fs(); set_fs(get_ds()); /* The cast to a user pointer is valid due to the set_fs() */ - ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); + ret = vfs_write(ts->wr, (__force void __user *)v, len, &ts->wr->f_pos); set_fs(oldfs); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) -- cgit v1.2.3 From 2db096086e8ff6c2a42881966e441157c686603b Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Wed, 25 Feb 2009 10:31:59 +0000 Subject: appletalk: fix warning: format not a string literal and no ... Impact: Use 'static const char[]' instead of 'static char[]', and since the data is const now it can be placed in __initconst. Fix this warning: net/appletalk/ddp.c: In function 'atalk_init': net/appletalk/ddp.c:1894: warning: format not a string literal and no format arguments Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 510a6782da8f..cf05c43cba52 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1877,7 +1877,7 @@ EXPORT_SYMBOL(aarp_send_ddp); EXPORT_SYMBOL(atrtr_get_dev); EXPORT_SYMBOL(atalk_find_dev_addr); -static char atalk_err_snap[] __initdata = +static const char atalk_err_snap[] __initconst = KERN_CRIT "Unable to register DDP with SNAP.\n"; /* Called by proto.c on kernel start up */ -- cgit v1.2.3 From 63d819caebcc3ca260ca0214d29044293969aa2f Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Wed, 25 Feb 2009 10:32:14 +0000 Subject: sysctl: fix sparse warning: Should it be static? Impact: Include header file. Fix this sparse warning: net/core/sysctl_net_core.c:123:32: warning: symbol 'net_core_path' was not declared. Should it be static? Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/core/sysctl_net_core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 83d3398559ea..7db1de0497c6 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -11,6 +11,7 @@ #include #include #include +#include #include static struct ctl_table net_core_table[] = { -- cgit v1.2.3 From 8521c27ee79533db444dcac291992b6bf8d61fc8 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Wed, 25 Feb 2009 10:32:25 +0000 Subject: decnet: fix sparse warnings: context imbalance Impact: Attribute functions with __acquires(...) resp. __releases(...). Fix this sparse warnings: net/decnet/dn_dev.c:1324:13: warning: context imbalance in 'dn_dev_seq_start' - wrong count at exit net/decnet/dn_dev.c:1366:13: warning: context imbalance in 'dn_dev_seq_stop' - unexpected unlock Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/decnet/dn_dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index e457769bf7a7..b6c80f3b8185 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1323,6 +1323,7 @@ static inline int is_dn_dev(struct net_device *dev) } static void *dn_dev_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(&dev_base_lock) { int i; struct net_device *dev; @@ -1365,6 +1366,7 @@ static void *dn_dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void dn_dev_seq_stop(struct seq_file *seq, void *v) + __releases(&dev_base_lock) { read_unlock(&dev_base_lock); } -- cgit v1.2.3 From e57c624be8f99e56560fd2f4f485fe29f28dd74f Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Wed, 25 Feb 2009 10:32:39 +0000 Subject: decnet: fix sparse warnings: symbol shadows an earlier one Impact: Remove redundant variable declarations, resp. rename inner scope variable. Fix this sparse warnings: net/decnet/af_decnet.c:1252:40: warning: symbol 'skb' shadows an earlier one net/decnet/af_decnet.c:1223:24: originally declared here net/decnet/af_decnet.c:1582:29: warning: symbol 'val' shadows an earlier one net/decnet/af_decnet.c:1527:22: originally declared here net/decnet/dn_dev.c:687:21: warning: symbol 'err' shadows an earlier one net/decnet/dn_dev.c:670:13: originally declared here net/decnet/sysctl_net_decnet.c:182:21: warning: symbol 'len' shadows an earlier one net/decnet/sysctl_net_decnet.c:173:16: originally declared here Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/decnet/af_decnet.c | 17 +++++++++-------- net/decnet/dn_dev.c | 1 - net/decnet/sysctl_net_decnet.c | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 963da86d4ecf..ec233b64f853 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -1246,11 +1246,12 @@ static int dn_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case TIOCINQ: lock_sock(sk); - if ((skb = skb_peek(&scp->other_receive_queue)) != NULL) { + skb = skb_peek(&scp->other_receive_queue); + if (skb) { amount = skb->len; } else { - struct sk_buff *skb = sk->sk_receive_queue.next; - for(;;) { + skb = sk->sk_receive_queue.next; + for (;;) { if (skb == (struct sk_buff *)&sk->sk_receive_queue) break; @@ -1579,16 +1580,16 @@ static int __dn_getsockopt(struct socket *sock, int level,int optname, char __us default: #ifdef CONFIG_NETFILTER { - int val, len; + int ret, len; if(get_user(len, optlen)) return -EFAULT; - val = nf_getsockopt(sk, PF_DECnet, optname, + ret = nf_getsockopt(sk, PF_DECnet, optname, optval, &len); - if (val >= 0) - val = put_user(len, optlen); - return val; + if (ret >= 0) + ret = put_user(len, optlen); + return ret; } #endif case DSO_STREAM: diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index b6c80f3b8185..1c6a5bb6f0c8 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -684,7 +684,6 @@ static int dn_nl_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; if ((dn_db = dev->dn_ptr) == NULL) { - int err; dn_db = dn_dev_create(dev, &err); if (!dn_db) return err; diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 965397af9a80..5bcd592ae6dd 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -179,7 +179,7 @@ static int dn_node_address_handler(ctl_table *table, int write, } if (write) { - int len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); + len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); if (copy_from_user(addr, buffer, len)) return -EFAULT; -- cgit v1.2.3 From 56bca31ff1989aa8b60f717e984b0e624f06324e Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Wed, 25 Feb 2009 10:32:52 +0000 Subject: inet fragments: fix sparse warning: context imbalance Impact: Attribute function with __releases(...) Fix this sparse warning: net/ipv4/inet_fragment.c:276:35: warning: context imbalance in 'inet_frag_find' - unexpected unlock Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/ipv4/inet_fragment.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 6c52e08f786e..eaf3e2c8646a 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -267,6 +267,7 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf, struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) + __releases(&f->lock) { struct inet_frag_queue *q; struct hlist_node *n; -- cgit v1.2.3 From 9ee62630fddda0142cebc45e3bbea20a8009be97 Mon Sep 17 00:00:00 2001 From: Hannes Eder Date: Wed, 25 Feb 2009 10:33:06 +0000 Subject: wanrouter: fix sparse warnings: context imbalance Impact: Attribute functions with __acquires(...) resp. __releases(...). Fix this sparse warnings: net/wanrouter/wanproc.c:82:13: warning: context imbalance in 'r_start' - wrong count at exit net/wanrouter/wanproc.c:103:13: warning: context imbalance in 'r_stop' - unexpected unlock net/wanrouter/wanmain.c:765:13: warning: context imbalance in 'lock_adapter_irq' - wrong count at exit net/wanrouter/wanmain.c:771:13: warning: context imbalance in 'unlock_adapter_irq' - unexpected unlock Signed-off-by: Hannes Eder Signed-off-by: David S. Miller --- net/wanrouter/wanmain.c | 8 ++++++-- net/wanrouter/wanproc.c | 2 ++ 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wanrouter/wanmain.c b/net/wanrouter/wanmain.c index 39701dec1dba..466e2d22d256 100644 --- a/net/wanrouter/wanmain.c +++ b/net/wanrouter/wanmain.c @@ -86,8 +86,10 @@ static int wanrouter_device_del_if(struct wan_device *wandev, static struct wan_device *wanrouter_find_device(char *name); static int wanrouter_delete_interface(struct wan_device *wandev, char *name); -static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); -static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags); +static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) + __acquires(lock); +static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) + __releases(lock); @@ -763,12 +765,14 @@ static int wanrouter_delete_interface(struct wan_device *wandev, char *name) } static void lock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) + __acquires(lock) { spin_lock_irqsave(lock, *smp_flags); } static void unlock_adapter_irq(spinlock_t *lock, unsigned long *smp_flags) + __releases(lock) { spin_unlock_irqrestore(lock, *smp_flags); } diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index 267f7ff49827..c44d96b3a437 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -80,6 +80,7 @@ static struct proc_dir_entry *proc_router; * Iterator */ static void *r_start(struct seq_file *m, loff_t *pos) + __acquires(kernel_lock) { struct wan_device *wandev; loff_t l = *pos; @@ -101,6 +102,7 @@ static void *r_next(struct seq_file *m, void *v, loff_t *pos) } static void r_stop(struct seq_file *m, void *v) + __releases(kernel_lock) { unlock_kernel(); } -- cgit v1.2.3 From 639b321b4d8f4e412bfbb2a4a19bfebc1e68ace4 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:18 +0000 Subject: RDS: Socket interface Implement the RDS (Reliable Datagram Sockets) interface. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/af_rds.c | 586 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/rds/bind.c | 199 +++++++++++++++++++ 2 files changed, 785 insertions(+) create mode 100644 net/rds/af_rds.c create mode 100644 net/rds/bind.c (limited to 'net') diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c new file mode 100644 index 000000000000..20cf16fc572f --- /dev/null +++ b/net/rds/af_rds.c @@ -0,0 +1,586 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include "rds.h" +#include "rdma.h" +#include "rdma_transport.h" + +/* this is just used for stats gathering :/ */ +static DEFINE_SPINLOCK(rds_sock_lock); +static unsigned long rds_sock_count; +static LIST_HEAD(rds_sock_list); +DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq); + +/* + * This is called as the final descriptor referencing this socket is closed. + * We have to unbind the socket so that another socket can be bound to the + * address it was using. + * + * We have to be careful about racing with the incoming path. sock_orphan() + * sets SOCK_DEAD and we use that as an indicator to the rx path that new + * messages shouldn't be queued. + */ +static int rds_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct rds_sock *rs; + unsigned long flags; + + if (sk == NULL) + goto out; + + rs = rds_sk_to_rs(sk); + + sock_orphan(sk); + /* Note - rds_clear_recv_queue grabs rs_recv_lock, so + * that ensures the recv path has completed messing + * with the socket. */ + rds_clear_recv_queue(rs); + rds_cong_remove_socket(rs); + rds_remove_bound(rs); + rds_send_drop_to(rs, NULL); + rds_rdma_drop_keys(rs); + rds_notify_queue_get(rs, NULL); + + spin_lock_irqsave(&rds_sock_lock, flags); + list_del_init(&rs->rs_item); + rds_sock_count--; + spin_unlock_irqrestore(&rds_sock_lock, flags); + + sock->sk = NULL; + sock_put(sk); +out: + return 0; +} + +/* + * Careful not to race with rds_release -> sock_orphan which clears sk_sleep. + * _bh() isn't OK here, we're called from interrupt handlers. It's probably OK + * to wake the waitqueue after sk_sleep is clear as we hold a sock ref, but + * this seems more conservative. + * NB - normally, one would use sk_callback_lock for this, but we can + * get here from interrupts, whereas the network code grabs sk_callback_lock + * with _lock_bh only - so relying on sk_callback_lock introduces livelocks. + */ +void rds_wake_sk_sleep(struct rds_sock *rs) +{ + unsigned long flags; + + read_lock_irqsave(&rs->rs_recv_lock, flags); + __rds_wake_sk_sleep(rds_rs_to_sk(rs)); + read_unlock_irqrestore(&rs->rs_recv_lock, flags); +} + +static int rds_getname(struct socket *sock, struct sockaddr *uaddr, + int *uaddr_len, int peer) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + struct rds_sock *rs = rds_sk_to_rs(sock->sk); + + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + + /* racey, don't care */ + if (peer) { + if (!rs->rs_conn_addr) + return -ENOTCONN; + + sin->sin_port = rs->rs_conn_port; + sin->sin_addr.s_addr = rs->rs_conn_addr; + } else { + sin->sin_port = rs->rs_bound_port; + sin->sin_addr.s_addr = rs->rs_bound_addr; + } + + sin->sin_family = AF_INET; + + *uaddr_len = sizeof(*sin); + return 0; +} + +/* + * RDS' poll is without a doubt the least intuitive part of the interface, + * as POLLIN and POLLOUT do not behave entirely as you would expect from + * a network protocol. + * + * POLLIN is asserted if + * - there is data on the receive queue. + * - to signal that a previously congested destination may have become + * uncongested + * - A notification has been queued to the socket (this can be a congestion + * update, or a RDMA completion). + * + * POLLOUT is asserted if there is room on the send queue. This does not mean + * however, that the next sendmsg() call will succeed. If the application tries + * to send to a congested destination, the system call may still fail (and + * return ENOBUFS). + */ +static unsigned int rds_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct rds_sock *rs = rds_sk_to_rs(sk); + unsigned int mask = 0; + unsigned long flags; + + poll_wait(file, sk->sk_sleep, wait); + + poll_wait(file, &rds_poll_waitq, wait); + + read_lock_irqsave(&rs->rs_recv_lock, flags); + if (!rs->rs_cong_monitor) { + /* When a congestion map was updated, we signal POLLIN for + * "historical" reasons. Applications can also poll for + * WRBAND instead. */ + if (rds_cong_updated_since(&rs->rs_cong_track)) + mask |= (POLLIN | POLLRDNORM | POLLWRBAND); + } else { + spin_lock(&rs->rs_lock); + if (rs->rs_cong_notify) + mask |= (POLLIN | POLLRDNORM); + spin_unlock(&rs->rs_lock); + } + if (!list_empty(&rs->rs_recv_queue) + || !list_empty(&rs->rs_notify_queue)) + mask |= (POLLIN | POLLRDNORM); + if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) + mask |= (POLLOUT | POLLWRNORM); + read_unlock_irqrestore(&rs->rs_recv_lock, flags); + + return mask; +} + +static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) +{ + return -ENOIOCTLCMD; +} + +static int rds_cancel_sent_to(struct rds_sock *rs, char __user *optval, + int len) +{ + struct sockaddr_in sin; + int ret = 0; + + /* racing with another thread binding seems ok here */ + if (rs->rs_bound_addr == 0) { + ret = -ENOTCONN; /* XXX not a great errno */ + goto out; + } + + if (len < sizeof(struct sockaddr_in)) { + ret = -EINVAL; + goto out; + } + + if (copy_from_user(&sin, optval, sizeof(sin))) { + ret = -EFAULT; + goto out; + } + + rds_send_drop_to(rs, &sin); +out: + return ret; +} + +static int rds_set_bool_option(unsigned char *optvar, char __user *optval, + int optlen) +{ + int value; + + if (optlen < sizeof(int)) + return -EINVAL; + if (get_user(value, (int __user *) optval)) + return -EFAULT; + *optvar = !!value; + return 0; +} + +static int rds_cong_monitor(struct rds_sock *rs, char __user *optval, + int optlen) +{ + int ret; + + ret = rds_set_bool_option(&rs->rs_cong_monitor, optval, optlen); + if (ret == 0) { + if (rs->rs_cong_monitor) { + rds_cong_add_socket(rs); + } else { + rds_cong_remove_socket(rs); + rs->rs_cong_mask = 0; + rs->rs_cong_notify = 0; + } + } + return ret; +} + +static int rds_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, int optlen) +{ + struct rds_sock *rs = rds_sk_to_rs(sock->sk); + int ret; + + if (level != SOL_RDS) { + ret = -ENOPROTOOPT; + goto out; + } + + switch (optname) { + case RDS_CANCEL_SENT_TO: + ret = rds_cancel_sent_to(rs, optval, optlen); + break; + case RDS_GET_MR: + ret = rds_get_mr(rs, optval, optlen); + break; + case RDS_FREE_MR: + ret = rds_free_mr(rs, optval, optlen); + break; + case RDS_RECVERR: + ret = rds_set_bool_option(&rs->rs_recverr, optval, optlen); + break; + case RDS_CONG_MONITOR: + ret = rds_cong_monitor(rs, optval, optlen); + break; + default: + ret = -ENOPROTOOPT; + } +out: + return ret; +} + +static int rds_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct rds_sock *rs = rds_sk_to_rs(sock->sk); + int ret = -ENOPROTOOPT, len; + + if (level != SOL_RDS) + goto out; + + if (get_user(len, optlen)) { + ret = -EFAULT; + goto out; + } + + switch (optname) { + case RDS_INFO_FIRST ... RDS_INFO_LAST: + ret = rds_info_getsockopt(sock, optname, optval, + optlen); + break; + + case RDS_RECVERR: + if (len < sizeof(int)) + ret = -EINVAL; + else + if (put_user(rs->rs_recverr, (int __user *) optval) + || put_user(sizeof(int), optlen)) + ret = -EFAULT; + else + ret = 0; + break; + default: + break; + } + +out: + return ret; + +} + +static int rds_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + struct rds_sock *rs = rds_sk_to_rs(sk); + int ret = 0; + + lock_sock(sk); + + if (addr_len != sizeof(struct sockaddr_in)) { + ret = -EINVAL; + goto out; + } + + if (sin->sin_family != AF_INET) { + ret = -EAFNOSUPPORT; + goto out; + } + + if (sin->sin_addr.s_addr == htonl(INADDR_ANY)) { + ret = -EDESTADDRREQ; + goto out; + } + + rs->rs_conn_addr = sin->sin_addr.s_addr; + rs->rs_conn_port = sin->sin_port; + +out: + release_sock(sk); + return ret; +} + +static struct proto rds_proto = { + .name = "RDS", + .owner = THIS_MODULE, + .obj_size = sizeof(struct rds_sock), +}; + +static struct proto_ops rds_proto_ops = { + .family = AF_RDS, + .owner = THIS_MODULE, + .release = rds_release, + .bind = rds_bind, + .connect = rds_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = rds_getname, + .poll = rds_poll, + .ioctl = rds_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = rds_setsockopt, + .getsockopt = rds_getsockopt, + .sendmsg = rds_sendmsg, + .recvmsg = rds_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static int __rds_create(struct socket *sock, struct sock *sk, int protocol) +{ + unsigned long flags; + struct rds_sock *rs; + + sock_init_data(sock, sk); + sock->ops = &rds_proto_ops; + sk->sk_protocol = protocol; + + rs = rds_sk_to_rs(sk); + spin_lock_init(&rs->rs_lock); + rwlock_init(&rs->rs_recv_lock); + INIT_LIST_HEAD(&rs->rs_send_queue); + INIT_LIST_HEAD(&rs->rs_recv_queue); + INIT_LIST_HEAD(&rs->rs_notify_queue); + INIT_LIST_HEAD(&rs->rs_cong_list); + spin_lock_init(&rs->rs_rdma_lock); + rs->rs_rdma_keys = RB_ROOT; + + spin_lock_irqsave(&rds_sock_lock, flags); + list_add_tail(&rs->rs_item, &rds_sock_list); + rds_sock_count++; + spin_unlock_irqrestore(&rds_sock_lock, flags); + + return 0; +} + +static int rds_create(struct net *net, struct socket *sock, int protocol) +{ + struct sock *sk; + + if (sock->type != SOCK_SEQPACKET || protocol) + return -ESOCKTNOSUPPORT; + + sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto); + if (!sk) + return -ENOMEM; + + return __rds_create(sock, sk, protocol); +} + +void rds_sock_addref(struct rds_sock *rs) +{ + sock_hold(rds_rs_to_sk(rs)); +} + +void rds_sock_put(struct rds_sock *rs) +{ + sock_put(rds_rs_to_sk(rs)); +} + +static struct net_proto_family rds_family_ops = { + .family = AF_RDS, + .create = rds_create, + .owner = THIS_MODULE, +}; + +static void rds_sock_inc_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds_sock *rs; + struct sock *sk; + struct rds_incoming *inc; + unsigned long flags; + unsigned int total = 0; + + len /= sizeof(struct rds_info_message); + + spin_lock_irqsave(&rds_sock_lock, flags); + + list_for_each_entry(rs, &rds_sock_list, rs_item) { + sk = rds_rs_to_sk(rs); + read_lock(&rs->rs_recv_lock); + + /* XXX too lazy to maintain counts.. */ + list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { + total++; + if (total <= len) + rds_inc_info_copy(inc, iter, inc->i_saddr, + rs->rs_bound_addr, 1); + } + + read_unlock(&rs->rs_recv_lock); + } + + spin_unlock_irqrestore(&rds_sock_lock, flags); + + lens->nr = total; + lens->each = sizeof(struct rds_info_message); +} + +static void rds_sock_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds_info_socket sinfo; + struct rds_sock *rs; + unsigned long flags; + + len /= sizeof(struct rds_info_socket); + + spin_lock_irqsave(&rds_sock_lock, flags); + + if (len < rds_sock_count) + goto out; + + list_for_each_entry(rs, &rds_sock_list, rs_item) { + sinfo.sndbuf = rds_sk_sndbuf(rs); + sinfo.rcvbuf = rds_sk_rcvbuf(rs); + sinfo.bound_addr = rs->rs_bound_addr; + sinfo.connected_addr = rs->rs_conn_addr; + sinfo.bound_port = rs->rs_bound_port; + sinfo.connected_port = rs->rs_conn_port; + sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); + + rds_info_copy(iter, &sinfo, sizeof(sinfo)); + } + +out: + lens->nr = rds_sock_count; + lens->each = sizeof(struct rds_info_socket); + + spin_unlock_irqrestore(&rds_sock_lock, flags); +} + +static void __exit rds_exit(void) +{ + rds_rdma_exit(); + sock_unregister(rds_family_ops.family); + proto_unregister(&rds_proto); + rds_conn_exit(); + rds_cong_exit(); + rds_sysctl_exit(); + rds_threads_exit(); + rds_stats_exit(); + rds_page_exit(); + rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); + rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); +} +module_exit(rds_exit); + +static int __init rds_init(void) +{ + int ret; + + ret = rds_conn_init(); + if (ret) + goto out; + ret = rds_threads_init(); + if (ret) + goto out_conn; + ret = rds_sysctl_init(); + if (ret) + goto out_threads; + ret = rds_stats_init(); + if (ret) + goto out_sysctl; + ret = proto_register(&rds_proto, 1); + if (ret) + goto out_stats; + ret = sock_register(&rds_family_ops); + if (ret) + goto out_proto; + + rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info); + rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); + + /* ib/iwarp transports currently compiled-in */ + ret = rds_rdma_init(); + if (ret) + goto out_sock; + goto out; + +out_sock: + sock_unregister(rds_family_ops.family); +out_proto: + proto_unregister(&rds_proto); +out_stats: + rds_stats_exit(); +out_sysctl: + rds_sysctl_exit(); +out_threads: + rds_threads_exit(); +out_conn: + rds_conn_exit(); + rds_cong_exit(); + rds_page_exit(); +out: + return ret; +} +module_init(rds_init); + +#define DRV_VERSION "4.0" +#define DRV_RELDATE "Feb 12, 2009" + +MODULE_AUTHOR("Oracle Corporation "); +MODULE_DESCRIPTION("RDS: Reliable Datagram Sockets" + " v" DRV_VERSION " (" DRV_RELDATE ")"); +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS_NETPROTO(PF_RDS); diff --git a/net/rds/bind.c b/net/rds/bind.c new file mode 100644 index 000000000000..c17cc39160ce --- /dev/null +++ b/net/rds/bind.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include +#include "rds.h" + +/* + * XXX this probably still needs more work.. no INADDR_ANY, and rbtrees aren't + * particularly zippy. + * + * This is now called for every incoming frame so we arguably care much more + * about it than we used to. + */ +static DEFINE_SPINLOCK(rds_bind_lock); +static struct rb_root rds_bind_tree = RB_ROOT; + +static struct rds_sock *rds_bind_tree_walk(__be32 addr, __be16 port, + struct rds_sock *insert) +{ + struct rb_node **p = &rds_bind_tree.rb_node; + struct rb_node *parent = NULL; + struct rds_sock *rs; + u64 cmp; + u64 needle = ((u64)be32_to_cpu(addr) << 32) | be16_to_cpu(port); + + while (*p) { + parent = *p; + rs = rb_entry(parent, struct rds_sock, rs_bound_node); + + cmp = ((u64)be32_to_cpu(rs->rs_bound_addr) << 32) | + be16_to_cpu(rs->rs_bound_port); + + if (needle < cmp) + p = &(*p)->rb_left; + else if (needle > cmp) + p = &(*p)->rb_right; + else + return rs; + } + + if (insert) { + rb_link_node(&insert->rs_bound_node, parent, p); + rb_insert_color(&insert->rs_bound_node, &rds_bind_tree); + } + return NULL; +} + +/* + * Return the rds_sock bound at the given local address. + * + * The rx path can race with rds_release. We notice if rds_release() has + * marked this socket and don't return a rs ref to the rx path. + */ +struct rds_sock *rds_find_bound(__be32 addr, __be16 port) +{ + struct rds_sock *rs; + unsigned long flags; + + spin_lock_irqsave(&rds_bind_lock, flags); + rs = rds_bind_tree_walk(addr, port, NULL); + if (rs && !sock_flag(rds_rs_to_sk(rs), SOCK_DEAD)) + rds_sock_addref(rs); + else + rs = NULL; + spin_unlock_irqrestore(&rds_bind_lock, flags); + + rdsdebug("returning rs %p for %pI4:%u\n", rs, &addr, + ntohs(port)); + return rs; +} + +/* returns -ve errno or +ve port */ +static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port) +{ + unsigned long flags; + int ret = -EADDRINUSE; + u16 rover, last; + + if (*port != 0) { + rover = be16_to_cpu(*port); + last = rover; + } else { + rover = max_t(u16, net_random(), 2); + last = rover - 1; + } + + spin_lock_irqsave(&rds_bind_lock, flags); + + do { + if (rover == 0) + rover++; + if (rds_bind_tree_walk(addr, cpu_to_be16(rover), rs) == NULL) { + *port = cpu_to_be16(rover); + ret = 0; + break; + } + } while (rover++ != last); + + if (ret == 0) { + rs->rs_bound_addr = addr; + rs->rs_bound_port = *port; + rds_sock_addref(rs); + + rdsdebug("rs %p binding to %pI4:%d\n", + rs, &addr, (int)ntohs(*port)); + } + + spin_unlock_irqrestore(&rds_bind_lock, flags); + + return ret; +} + +void rds_remove_bound(struct rds_sock *rs) +{ + unsigned long flags; + + spin_lock_irqsave(&rds_bind_lock, flags); + + if (rs->rs_bound_addr) { + rdsdebug("rs %p unbinding from %pI4:%d\n", + rs, &rs->rs_bound_addr, + ntohs(rs->rs_bound_port)); + + rb_erase(&rs->rs_bound_node, &rds_bind_tree); + rds_sock_put(rs); + rs->rs_bound_addr = 0; + } + + spin_unlock_irqrestore(&rds_bind_lock, flags); +} + +int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) +{ + struct sock *sk = sock->sk; + struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + struct rds_sock *rs = rds_sk_to_rs(sk); + struct rds_transport *trans; + int ret = 0; + + lock_sock(sk); + + if (addr_len != sizeof(struct sockaddr_in) || + sin->sin_family != AF_INET || + rs->rs_bound_addr || + sin->sin_addr.s_addr == htonl(INADDR_ANY)) { + ret = -EINVAL; + goto out; + } + + ret = rds_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port); + if (ret) + goto out; + + trans = rds_trans_get_preferred(sin->sin_addr.s_addr); + if (trans == NULL) { + ret = -EADDRNOTAVAIL; + rds_remove_bound(rs); + goto out; + } + + rs->rs_transport = trans; + ret = 0; + +out: + release_sock(sk); + return ret; +} -- cgit v1.2.3 From 39de8281791c4a01abcb0d32879530ffa5863c01 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:19 +0000 Subject: RDS: Main header file RDS's main data structure definitions and exported functions. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rds.h | 686 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 686 insertions(+) create mode 100644 net/rds/rds.h (limited to 'net') diff --git a/net/rds/rds.h b/net/rds/rds.h new file mode 100644 index 000000000000..060400704979 --- /dev/null +++ b/net/rds/rds.h @@ -0,0 +1,686 @@ +#ifndef _RDS_RDS_H +#define _RDS_RDS_H + +#include +#include +#include +#include +#include +#include + +#include "info.h" + +/* + * RDS Network protocol version + */ +#define RDS_PROTOCOL_3_0 0x0300 +#define RDS_PROTOCOL_3_1 0x0301 +#define RDS_PROTOCOL_VERSION RDS_PROTOCOL_3_1 +#define RDS_PROTOCOL_MAJOR(v) ((v) >> 8) +#define RDS_PROTOCOL_MINOR(v) ((v) & 255) +#define RDS_PROTOCOL(maj, min) (((maj) << 8) | min) + +/* + * XXX randomly chosen, but at least seems to be unused: + * # 18464-18768 Unassigned + * We should do better. We want a reserved port to discourage unpriv'ed + * userspace from listening. + */ +#define RDS_PORT 18634 + +#ifdef DEBUG +#define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) +#else +/* sigh, pr_debug() causes unused variable warnings */ +static inline void __attribute__ ((format (printf, 1, 2))) +rdsdebug(char *fmt, ...) +{ +} +#endif + +/* XXX is there one of these somewhere? */ +#define ceil(x, y) \ + ({ unsigned long __x = (x), __y = (y); (__x + __y - 1) / __y; }) + +#define RDS_FRAG_SHIFT 12 +#define RDS_FRAG_SIZE ((unsigned int)(1 << RDS_FRAG_SHIFT)) + +#define RDS_CONG_MAP_BYTES (65536 / 8) +#define RDS_CONG_MAP_LONGS (RDS_CONG_MAP_BYTES / sizeof(unsigned long)) +#define RDS_CONG_MAP_PAGES (PAGE_ALIGN(RDS_CONG_MAP_BYTES) / PAGE_SIZE) +#define RDS_CONG_MAP_PAGE_BITS (PAGE_SIZE * 8) + +struct rds_cong_map { + struct rb_node m_rb_node; + __be32 m_addr; + wait_queue_head_t m_waitq; + struct list_head m_conn_list; + unsigned long m_page_addrs[RDS_CONG_MAP_PAGES]; +}; + + +/* + * This is how we will track the connection state: + * A connection is always in one of the following + * states. Updates to the state are atomic and imply + * a memory barrier. + */ +enum { + RDS_CONN_DOWN = 0, + RDS_CONN_CONNECTING, + RDS_CONN_DISCONNECTING, + RDS_CONN_UP, + RDS_CONN_ERROR, +}; + +/* Bits for c_flags */ +#define RDS_LL_SEND_FULL 0 +#define RDS_RECONNECT_PENDING 1 + +struct rds_connection { + struct hlist_node c_hash_node; + __be32 c_laddr; + __be32 c_faddr; + unsigned int c_loopback:1; + struct rds_connection *c_passive; + + struct rds_cong_map *c_lcong; + struct rds_cong_map *c_fcong; + + struct mutex c_send_lock; /* protect send ring */ + struct rds_message *c_xmit_rm; + unsigned long c_xmit_sg; + unsigned int c_xmit_hdr_off; + unsigned int c_xmit_data_off; + unsigned int c_xmit_rdma_sent; + + spinlock_t c_lock; /* protect msg queues */ + u64 c_next_tx_seq; + struct list_head c_send_queue; + struct list_head c_retrans; + + u64 c_next_rx_seq; + + struct rds_transport *c_trans; + void *c_transport_data; + + atomic_t c_state; + unsigned long c_flags; + unsigned long c_reconnect_jiffies; + struct delayed_work c_send_w; + struct delayed_work c_recv_w; + struct delayed_work c_conn_w; + struct work_struct c_down_w; + struct mutex c_cm_lock; /* protect conn state & cm */ + + struct list_head c_map_item; + unsigned long c_map_queued; + unsigned long c_map_offset; + unsigned long c_map_bytes; + + unsigned int c_unacked_packets; + unsigned int c_unacked_bytes; + + /* Protocol version */ + unsigned int c_version; +}; + +#define RDS_FLAG_CONG_BITMAP 0x01 +#define RDS_FLAG_ACK_REQUIRED 0x02 +#define RDS_FLAG_RETRANSMITTED 0x04 +#define RDS_MAX_ADV_CREDIT 127 + +/* + * Maximum space available for extension headers. + */ +#define RDS_HEADER_EXT_SPACE 16 + +struct rds_header { + __be64 h_sequence; + __be64 h_ack; + __be32 h_len; + __be16 h_sport; + __be16 h_dport; + u8 h_flags; + u8 h_credit; + u8 h_padding[4]; + __sum16 h_csum; + + u8 h_exthdr[RDS_HEADER_EXT_SPACE]; +}; + +/* + * Reserved - indicates end of extensions + */ +#define RDS_EXTHDR_NONE 0 + +/* + * This extension header is included in the very + * first message that is sent on a new connection, + * and identifies the protocol level. This will help + * rolling updates if a future change requires breaking + * the protocol. + * NB: This is no longer true for IB, where we do a version + * negotiation during the connection setup phase (protocol + * version information is included in the RDMA CM private data). + */ +#define RDS_EXTHDR_VERSION 1 +struct rds_ext_header_version { + __be32 h_version; +}; + +/* + * This extension header is included in the RDS message + * chasing an RDMA operation. + */ +#define RDS_EXTHDR_RDMA 2 +struct rds_ext_header_rdma { + __be32 h_rdma_rkey; +}; + +/* + * This extension header tells the peer about the + * destination of the requested RDMA + * operation. + */ +#define RDS_EXTHDR_RDMA_DEST 3 +struct rds_ext_header_rdma_dest { + __be32 h_rdma_rkey; + __be32 h_rdma_offset; +}; + +#define __RDS_EXTHDR_MAX 16 /* for now */ + +struct rds_incoming { + atomic_t i_refcount; + struct list_head i_item; + struct rds_connection *i_conn; + struct rds_header i_hdr; + unsigned long i_rx_jiffies; + __be32 i_saddr; + + rds_rdma_cookie_t i_rdma_cookie; +}; + +/* + * m_sock_item and m_conn_item are on lists that are serialized under + * conn->c_lock. m_sock_item has additional meaning in that once it is empty + * the message will not be put back on the retransmit list after being sent. + * messages that are canceled while being sent rely on this. + * + * m_inc is used by loopback so that it can pass an incoming message straight + * back up into the rx path. It embeds a wire header which is also used by + * the send path, which is kind of awkward. + * + * m_sock_item indicates the message's presence on a socket's send or receive + * queue. m_rs will point to that socket. + * + * m_daddr is used by cancellation to prune messages to a given destination. + * + * The RDS_MSG_ON_SOCK and RDS_MSG_ON_CONN flags are used to avoid lock + * nesting. As paths iterate over messages on a sock, or conn, they must + * also lock the conn, or sock, to remove the message from those lists too. + * Testing the flag to determine if the message is still on the lists lets + * us avoid testing the list_head directly. That means each path can use + * the message's list_head to keep it on a local list while juggling locks + * without confusing the other path. + * + * m_ack_seq is an optional field set by transports who need a different + * sequence number range to invalidate. They can use this in a callback + * that they pass to rds_send_drop_acked() to see if each message has been + * acked. The HAS_ACK_SEQ flag can be used to detect messages which haven't + * had ack_seq set yet. + */ +#define RDS_MSG_ON_SOCK 1 +#define RDS_MSG_ON_CONN 2 +#define RDS_MSG_HAS_ACK_SEQ 3 +#define RDS_MSG_ACK_REQUIRED 4 +#define RDS_MSG_RETRANSMITTED 5 +#define RDS_MSG_MAPPED 6 +#define RDS_MSG_PAGEVEC 7 + +struct rds_message { + atomic_t m_refcount; + struct list_head m_sock_item; + struct list_head m_conn_item; + struct rds_incoming m_inc; + u64 m_ack_seq; + __be32 m_daddr; + unsigned long m_flags; + + /* Never access m_rs without holding m_rs_lock. + * Lock nesting is + * rm->m_rs_lock + * -> rs->rs_lock + */ + spinlock_t m_rs_lock; + struct rds_sock *m_rs; + struct rds_rdma_op *m_rdma_op; + rds_rdma_cookie_t m_rdma_cookie; + struct rds_mr *m_rdma_mr; + unsigned int m_nents; + unsigned int m_count; + struct scatterlist m_sg[0]; +}; + +/* + * The RDS notifier is used (optionally) to tell the application about + * completed RDMA operations. Rather than keeping the whole rds message + * around on the queue, we allocate a small notifier that is put on the + * socket's notifier_list. Notifications are delivered to the application + * through control messages. + */ +struct rds_notifier { + struct list_head n_list; + uint64_t n_user_token; + int n_status; +}; + +/** + * struct rds_transport - transport specific behavioural hooks + * + * @xmit: .xmit is called by rds_send_xmit() to tell the transport to send + * part of a message. The caller serializes on the send_sem so this + * doesn't need to be reentrant for a given conn. The header must be + * sent before the data payload. .xmit must be prepared to send a + * message with no data payload. .xmit should return the number of + * bytes that were sent down the connection, including header bytes. + * Returning 0 tells the caller that it doesn't need to perform any + * additional work now. This is usually the case when the transport has + * filled the sending queue for its connection and will handle + * triggering the rds thread to continue the send when space becomes + * available. Returning -EAGAIN tells the caller to retry the send + * immediately. Returning -ENOMEM tells the caller to retry the send at + * some point in the future. + * + * @conn_shutdown: conn_shutdown stops traffic on the given connection. Once + * it returns the connection can not call rds_recv_incoming(). + * This will only be called once after conn_connect returns + * non-zero success and will The caller serializes this with + * the send and connecting paths (xmit_* and conn_*). The + * transport is responsible for other serialization, including + * rds_recv_incoming(). This is called in process context but + * should try hard not to block. + * + * @xmit_cong_map: This asks the transport to send the local bitmap down the + * given connection. XXX get a better story about the bitmap + * flag and header. + */ + +struct rds_transport { + char t_name[TRANSNAMSIZ]; + struct list_head t_item; + struct module *t_owner; + unsigned int t_prefer_loopback:1; + + int (*laddr_check)(__be32 addr); + int (*conn_alloc)(struct rds_connection *conn, gfp_t gfp); + void (*conn_free)(void *data); + int (*conn_connect)(struct rds_connection *conn); + void (*conn_shutdown)(struct rds_connection *conn); + void (*xmit_prepare)(struct rds_connection *conn); + void (*xmit_complete)(struct rds_connection *conn); + int (*xmit)(struct rds_connection *conn, struct rds_message *rm, + unsigned int hdr_off, unsigned int sg, unsigned int off); + int (*xmit_cong_map)(struct rds_connection *conn, + struct rds_cong_map *map, unsigned long offset); + int (*xmit_rdma)(struct rds_connection *conn, struct rds_rdma_op *op); + int (*recv)(struct rds_connection *conn); + int (*inc_copy_to_user)(struct rds_incoming *inc, struct iovec *iov, + size_t size); + void (*inc_purge)(struct rds_incoming *inc); + void (*inc_free)(struct rds_incoming *inc); + + int (*cm_handle_connect)(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event); + int (*cm_initiate_connect)(struct rdma_cm_id *cm_id); + void (*cm_connect_complete)(struct rds_connection *conn, + struct rdma_cm_event *event); + + unsigned int (*stats_info_copy)(struct rds_info_iterator *iter, + unsigned int avail); + void (*exit)(void); + void *(*get_mr)(struct scatterlist *sg, unsigned long nr_sg, + struct rds_sock *rs, u32 *key_ret); + void (*sync_mr)(void *trans_private, int direction); + void (*free_mr)(void *trans_private, int invalidate); + void (*flush_mrs)(void); +}; + +struct rds_sock { + struct sock rs_sk; + + u64 rs_user_addr; + u64 rs_user_bytes; + + /* + * bound_addr used for both incoming and outgoing, no INADDR_ANY + * support. + */ + struct rb_node rs_bound_node; + __be32 rs_bound_addr; + __be32 rs_conn_addr; + __be16 rs_bound_port; + __be16 rs_conn_port; + + /* + * This is only used to communicate the transport between bind and + * initiating connections. All other trans use is referenced through + * the connection. + */ + struct rds_transport *rs_transport; + + /* + * rds_sendmsg caches the conn it used the last time around. + * This helps avoid costly lookups. + */ + struct rds_connection *rs_conn; + + /* flag indicating we were congested or not */ + int rs_congested; + + /* rs_lock protects all these adjacent members before the newline */ + spinlock_t rs_lock; + struct list_head rs_send_queue; + u32 rs_snd_bytes; + int rs_rcv_bytes; + struct list_head rs_notify_queue; /* currently used for failed RDMAs */ + + /* Congestion wake_up. If rs_cong_monitor is set, we use cong_mask + * to decide whether the application should be woken up. + * If not set, we use rs_cong_track to find out whether a cong map + * update arrived. + */ + uint64_t rs_cong_mask; + uint64_t rs_cong_notify; + struct list_head rs_cong_list; + unsigned long rs_cong_track; + + /* + * rs_recv_lock protects the receive queue, and is + * used to serialize with rds_release. + */ + rwlock_t rs_recv_lock; + struct list_head rs_recv_queue; + + /* just for stats reporting */ + struct list_head rs_item; + + /* these have their own lock */ + spinlock_t rs_rdma_lock; + struct rb_root rs_rdma_keys; + + /* Socket options - in case there will be more */ + unsigned char rs_recverr, + rs_cong_monitor; +}; + +static inline struct rds_sock *rds_sk_to_rs(const struct sock *sk) +{ + return container_of(sk, struct rds_sock, rs_sk); +} +static inline struct sock *rds_rs_to_sk(struct rds_sock *rs) +{ + return &rs->rs_sk; +} + +/* + * The stack assigns sk_sndbuf and sk_rcvbuf to twice the specified value + * to account for overhead. We don't account for overhead, we just apply + * the number of payload bytes to the specified value. + */ +static inline int rds_sk_sndbuf(struct rds_sock *rs) +{ + return rds_rs_to_sk(rs)->sk_sndbuf / 2; +} +static inline int rds_sk_rcvbuf(struct rds_sock *rs) +{ + return rds_rs_to_sk(rs)->sk_rcvbuf / 2; +} + +struct rds_statistics { + uint64_t s_conn_reset; + uint64_t s_recv_drop_bad_checksum; + uint64_t s_recv_drop_old_seq; + uint64_t s_recv_drop_no_sock; + uint64_t s_recv_drop_dead_sock; + uint64_t s_recv_deliver_raced; + uint64_t s_recv_delivered; + uint64_t s_recv_queued; + uint64_t s_recv_immediate_retry; + uint64_t s_recv_delayed_retry; + uint64_t s_recv_ack_required; + uint64_t s_recv_rdma_bytes; + uint64_t s_recv_ping; + uint64_t s_send_queue_empty; + uint64_t s_send_queue_full; + uint64_t s_send_sem_contention; + uint64_t s_send_sem_queue_raced; + uint64_t s_send_immediate_retry; + uint64_t s_send_delayed_retry; + uint64_t s_send_drop_acked; + uint64_t s_send_ack_required; + uint64_t s_send_queued; + uint64_t s_send_rdma; + uint64_t s_send_rdma_bytes; + uint64_t s_send_pong; + uint64_t s_page_remainder_hit; + uint64_t s_page_remainder_miss; + uint64_t s_copy_to_user; + uint64_t s_copy_from_user; + uint64_t s_cong_update_queued; + uint64_t s_cong_update_received; + uint64_t s_cong_send_error; + uint64_t s_cong_send_blocked; +}; + +/* af_rds.c */ +void rds_sock_addref(struct rds_sock *rs); +void rds_sock_put(struct rds_sock *rs); +void rds_wake_sk_sleep(struct rds_sock *rs); +static inline void __rds_wake_sk_sleep(struct sock *sk) +{ + wait_queue_head_t *waitq = sk->sk_sleep; + + if (!sock_flag(sk, SOCK_DEAD) && waitq) + wake_up(waitq); +} +extern wait_queue_head_t rds_poll_waitq; + + +/* bind.c */ +int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); +void rds_remove_bound(struct rds_sock *rs); +struct rds_sock *rds_find_bound(__be32 addr, __be16 port); + +/* cong.c */ +int rds_cong_get_maps(struct rds_connection *conn); +void rds_cong_add_conn(struct rds_connection *conn); +void rds_cong_remove_conn(struct rds_connection *conn); +void rds_cong_set_bit(struct rds_cong_map *map, __be16 port); +void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port); +int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, struct rds_sock *rs); +void rds_cong_queue_updates(struct rds_cong_map *map); +void rds_cong_map_updated(struct rds_cong_map *map, uint64_t); +int rds_cong_updated_since(unsigned long *recent); +void rds_cong_add_socket(struct rds_sock *); +void rds_cong_remove_socket(struct rds_sock *); +void rds_cong_exit(void); +struct rds_message *rds_cong_update_alloc(struct rds_connection *conn); + +/* conn.c */ +int __init rds_conn_init(void); +void rds_conn_exit(void); +struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, + struct rds_transport *trans, gfp_t gfp); +struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, + struct rds_transport *trans, gfp_t gfp); +void rds_conn_destroy(struct rds_connection *conn); +void rds_conn_reset(struct rds_connection *conn); +void rds_conn_drop(struct rds_connection *conn); +void rds_for_each_conn_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens, + int (*visitor)(struct rds_connection *, void *), + size_t item_len); +void __rds_conn_error(struct rds_connection *conn, const char *, ...) + __attribute__ ((format (printf, 2, 3))); +#define rds_conn_error(conn, fmt...) \ + __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) + +static inline int +rds_conn_transition(struct rds_connection *conn, int old, int new) +{ + return atomic_cmpxchg(&conn->c_state, old, new) == old; +} + +static inline int +rds_conn_state(struct rds_connection *conn) +{ + return atomic_read(&conn->c_state); +} + +static inline int +rds_conn_up(struct rds_connection *conn) +{ + return atomic_read(&conn->c_state) == RDS_CONN_UP; +} + +static inline int +rds_conn_connecting(struct rds_connection *conn) +{ + return atomic_read(&conn->c_state) == RDS_CONN_CONNECTING; +} + +/* message.c */ +struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp); +struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, + size_t total_len); +struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len); +void rds_message_populate_header(struct rds_header *hdr, __be16 sport, + __be16 dport, u64 seq); +int rds_message_add_extension(struct rds_header *hdr, + unsigned int type, const void *data, unsigned int len); +int rds_message_next_extension(struct rds_header *hdr, + unsigned int *pos, void *buf, unsigned int *buflen); +int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version); +int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version); +int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset); +int rds_message_inc_copy_to_user(struct rds_incoming *inc, + struct iovec *first_iov, size_t size); +void rds_message_inc_purge(struct rds_incoming *inc); +void rds_message_inc_free(struct rds_incoming *inc); +void rds_message_addref(struct rds_message *rm); +void rds_message_put(struct rds_message *rm); +void rds_message_wait(struct rds_message *rm); +void rds_message_unmapped(struct rds_message *rm); + +static inline void rds_message_make_checksum(struct rds_header *hdr) +{ + hdr->h_csum = 0; + hdr->h_csum = ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2); +} + +static inline int rds_message_verify_checksum(const struct rds_header *hdr) +{ + return !hdr->h_csum || ip_fast_csum((void *) hdr, sizeof(*hdr) >> 2) == 0; +} + + +/* page.c */ +int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, + gfp_t gfp); +int rds_page_copy_user(struct page *page, unsigned long offset, + void __user *ptr, unsigned long bytes, + int to_user); +#define rds_page_copy_to_user(page, offset, ptr, bytes) \ + rds_page_copy_user(page, offset, ptr, bytes, 1) +#define rds_page_copy_from_user(page, offset, ptr, bytes) \ + rds_page_copy_user(page, offset, ptr, bytes, 0) +void rds_page_exit(void); + +/* recv.c */ +void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, + __be32 saddr); +void rds_inc_addref(struct rds_incoming *inc); +void rds_inc_put(struct rds_incoming *inc); +void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, + struct rds_incoming *inc, gfp_t gfp, enum km_type km); +int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, + size_t size, int msg_flags); +void rds_clear_recv_queue(struct rds_sock *rs); +int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msg); +void rds_inc_info_copy(struct rds_incoming *inc, + struct rds_info_iterator *iter, + __be32 saddr, __be32 daddr, int flip); + +/* send.c */ +int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, + size_t payload_len); +void rds_send_reset(struct rds_connection *conn); +int rds_send_xmit(struct rds_connection *conn); +struct sockaddr_in; +void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest); +typedef int (*is_acked_func)(struct rds_message *rm, uint64_t ack); +void rds_send_drop_acked(struct rds_connection *conn, u64 ack, + is_acked_func is_acked); +int rds_send_acked_before(struct rds_connection *conn, u64 seq); +void rds_send_remove_from_sock(struct list_head *messages, int status); +int rds_send_pong(struct rds_connection *conn, __be16 dport); +struct rds_message *rds_send_get_message(struct rds_connection *, + struct rds_rdma_op *); + +/* rdma.c */ +void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force); + +/* stats.c */ +DECLARE_PER_CPU(struct rds_statistics, rds_stats); +#define rds_stats_inc_which(which, member) do { \ + per_cpu(which, get_cpu()).member++; \ + put_cpu(); \ +} while (0) +#define rds_stats_inc(member) rds_stats_inc_which(rds_stats, member) +#define rds_stats_add_which(which, member, count) do { \ + per_cpu(which, get_cpu()).member += count; \ + put_cpu(); \ +} while (0) +#define rds_stats_add(member, count) rds_stats_add_which(rds_stats, member, count) +int __init rds_stats_init(void); +void rds_stats_exit(void); +void rds_stats_info_copy(struct rds_info_iterator *iter, + uint64_t *values, char **names, size_t nr); + +/* sysctl.c */ +int __init rds_sysctl_init(void); +void rds_sysctl_exit(void); +extern unsigned long rds_sysctl_sndbuf_min; +extern unsigned long rds_sysctl_sndbuf_default; +extern unsigned long rds_sysctl_sndbuf_max; +extern unsigned long rds_sysctl_reconnect_min_jiffies; +extern unsigned long rds_sysctl_reconnect_max_jiffies; +extern unsigned int rds_sysctl_max_unacked_packets; +extern unsigned int rds_sysctl_max_unacked_bytes; +extern unsigned int rds_sysctl_ping_enable; +extern unsigned long rds_sysctl_trace_flags; +extern unsigned int rds_sysctl_trace_level; + +/* threads.c */ +int __init rds_threads_init(void); +void rds_threads_exit(void); +extern struct workqueue_struct *rds_wq; +void rds_connect_worker(struct work_struct *); +void rds_shutdown_worker(struct work_struct *); +void rds_send_worker(struct work_struct *); +void rds_recv_worker(struct work_struct *); +void rds_connect_complete(struct rds_connection *conn); + +/* transport.c */ +int rds_trans_register(struct rds_transport *trans); +void rds_trans_unregister(struct rds_transport *trans); +struct rds_transport *rds_trans_get_preferred(__be32 addr); +unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, + unsigned int avail); +int __init rds_trans_init(void); +void rds_trans_exit(void); + +#endif -- cgit v1.2.3 From 922cb17a5c812fcc9ebee249f4109db099896941 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:20 +0000 Subject: RDS: Congestion-handling code RDS handles per-socket congestion by updating peers with a complete congestion map (8KB). This code keeps track of these maps for itself and ones received from peers. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/cong.c | 402 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 402 insertions(+) create mode 100644 net/rds/cong.c (limited to 'net') diff --git a/net/rds/cong.c b/net/rds/cong.c new file mode 100644 index 000000000000..90e6b31d8e8a --- /dev/null +++ b/net/rds/cong.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2007 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include + +#include "rds.h" + +/* + * This file implements the receive side of the unconventional congestion + * management in RDS. + * + * Messages waiting in the receive queue on the receiving socket are accounted + * against the sockets SO_RCVBUF option value. Only the payload bytes in the + * message are accounted for. If the number of bytes queued equals or exceeds + * rcvbuf then the socket is congested. All sends attempted to this socket's + * address should return block or return -EWOULDBLOCK. + * + * Applications are expected to be reasonably tuned such that this situation + * very rarely occurs. An application encountering this "back-pressure" is + * considered a bug. + * + * This is implemented by having each node maintain bitmaps which indicate + * which ports on bound addresses are congested. As the bitmap changes it is + * sent through all the connections which terminate in the local address of the + * bitmap which changed. + * + * The bitmaps are allocated as connections are brought up. This avoids + * allocation in the interrupt handling path which queues messages on sockets. + * The dense bitmaps let transports send the entire bitmap on any bitmap change + * reasonably efficiently. This is much easier to implement than some + * finer-grained communication of per-port congestion. The sender does a very + * inexpensive bit test to test if the port it's about to send to is congested + * or not. + */ + +/* + * Interaction with poll is a tad tricky. We want all processes stuck in + * poll to wake up and check whether a congested destination became uncongested. + * The really sad thing is we have no idea which destinations the application + * wants to send to - we don't even know which rds_connections are involved. + * So until we implement a more flexible rds poll interface, we have to make + * do with this: + * We maintain a global counter that is incremented each time a congestion map + * update is received. Each rds socket tracks this value, and if rds_poll + * finds that the saved generation number is smaller than the global generation + * number, it wakes up the process. + */ +static atomic_t rds_cong_generation = ATOMIC_INIT(0); + +/* + * Congestion monitoring + */ +static LIST_HEAD(rds_cong_monitor); +static DEFINE_RWLOCK(rds_cong_monitor_lock); + +/* + * Yes, a global lock. It's used so infrequently that it's worth keeping it + * global to simplify the locking. It's only used in the following + * circumstances: + * + * - on connection buildup to associate a conn with its maps + * - on map changes to inform conns of a new map to send + * + * It's sadly ordered under the socket callback lock and the connection lock. + * Receive paths can mark ports congested from interrupt context so the + * lock masks interrupts. + */ +static DEFINE_SPINLOCK(rds_cong_lock); +static struct rb_root rds_cong_tree = RB_ROOT; + +static struct rds_cong_map *rds_cong_tree_walk(__be32 addr, + struct rds_cong_map *insert) +{ + struct rb_node **p = &rds_cong_tree.rb_node; + struct rb_node *parent = NULL; + struct rds_cong_map *map; + + while (*p) { + parent = *p; + map = rb_entry(parent, struct rds_cong_map, m_rb_node); + + if (addr < map->m_addr) + p = &(*p)->rb_left; + else if (addr > map->m_addr) + p = &(*p)->rb_right; + else + return map; + } + + if (insert) { + rb_link_node(&insert->m_rb_node, parent, p); + rb_insert_color(&insert->m_rb_node, &rds_cong_tree); + } + return NULL; +} + +/* + * There is only ever one bitmap for any address. Connections try and allocate + * these bitmaps in the process getting pointers to them. The bitmaps are only + * ever freed as the module is removed after all connections have been freed. + */ +static struct rds_cong_map *rds_cong_from_addr(__be32 addr) +{ + struct rds_cong_map *map; + struct rds_cong_map *ret = NULL; + unsigned long zp; + unsigned long i; + unsigned long flags; + + map = kzalloc(sizeof(struct rds_cong_map), GFP_KERNEL); + if (map == NULL) + return NULL; + + map->m_addr = addr; + init_waitqueue_head(&map->m_waitq); + INIT_LIST_HEAD(&map->m_conn_list); + + for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { + zp = get_zeroed_page(GFP_KERNEL); + if (zp == 0) + goto out; + map->m_page_addrs[i] = zp; + } + + spin_lock_irqsave(&rds_cong_lock, flags); + ret = rds_cong_tree_walk(addr, map); + spin_unlock_irqrestore(&rds_cong_lock, flags); + + if (ret == NULL) { + ret = map; + map = NULL; + } + +out: + if (map) { + for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++) + free_page(map->m_page_addrs[i]); + kfree(map); + } + + rdsdebug("map %p for addr %x\n", ret, be32_to_cpu(addr)); + + return ret; +} + +/* + * Put the conn on its local map's list. This is called when the conn is + * really added to the hash. It's nested under the rds_conn_lock, sadly. + */ +void rds_cong_add_conn(struct rds_connection *conn) +{ + unsigned long flags; + + rdsdebug("conn %p now on map %p\n", conn, conn->c_lcong); + spin_lock_irqsave(&rds_cong_lock, flags); + list_add_tail(&conn->c_map_item, &conn->c_lcong->m_conn_list); + spin_unlock_irqrestore(&rds_cong_lock, flags); +} + +void rds_cong_remove_conn(struct rds_connection *conn) +{ + unsigned long flags; + + rdsdebug("removing conn %p from map %p\n", conn, conn->c_lcong); + spin_lock_irqsave(&rds_cong_lock, flags); + list_del_init(&conn->c_map_item); + spin_unlock_irqrestore(&rds_cong_lock, flags); +} + +int rds_cong_get_maps(struct rds_connection *conn) +{ + conn->c_lcong = rds_cong_from_addr(conn->c_laddr); + conn->c_fcong = rds_cong_from_addr(conn->c_faddr); + + if (conn->c_lcong == NULL || conn->c_fcong == NULL) + return -ENOMEM; + + return 0; +} + +void rds_cong_queue_updates(struct rds_cong_map *map) +{ + struct rds_connection *conn; + unsigned long flags; + + spin_lock_irqsave(&rds_cong_lock, flags); + + list_for_each_entry(conn, &map->m_conn_list, c_map_item) { + if (!test_and_set_bit(0, &conn->c_map_queued)) { + rds_stats_inc(s_cong_update_queued); + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + } + } + + spin_unlock_irqrestore(&rds_cong_lock, flags); +} + +void rds_cong_map_updated(struct rds_cong_map *map, uint64_t portmask) +{ + rdsdebug("waking map %p for %pI4\n", + map, &map->m_addr); + rds_stats_inc(s_cong_update_received); + atomic_inc(&rds_cong_generation); + if (waitqueue_active(&map->m_waitq)) + wake_up(&map->m_waitq); + if (waitqueue_active(&rds_poll_waitq)) + wake_up_all(&rds_poll_waitq); + + if (portmask && !list_empty(&rds_cong_monitor)) { + unsigned long flags; + struct rds_sock *rs; + + read_lock_irqsave(&rds_cong_monitor_lock, flags); + list_for_each_entry(rs, &rds_cong_monitor, rs_cong_list) { + spin_lock(&rs->rs_lock); + rs->rs_cong_notify |= (rs->rs_cong_mask & portmask); + rs->rs_cong_mask &= ~portmask; + spin_unlock(&rs->rs_lock); + if (rs->rs_cong_notify) + rds_wake_sk_sleep(rs); + } + read_unlock_irqrestore(&rds_cong_monitor_lock, flags); + } +} + +int rds_cong_updated_since(unsigned long *recent) +{ + unsigned long gen = atomic_read(&rds_cong_generation); + + if (likely(*recent == gen)) + return 0; + *recent = gen; + return 1; +} + +/* + * We're called under the locking that protects the sockets receive buffer + * consumption. This makes it a lot easier for the caller to only call us + * when it knows that an existing set bit needs to be cleared, and vice versa. + * We can't block and we need to deal with concurrent sockets working against + * the same per-address map. + */ +void rds_cong_set_bit(struct rds_cong_map *map, __be16 port) +{ + unsigned long i; + unsigned long off; + + rdsdebug("setting congestion for %pI4:%u in map %p\n", + &map->m_addr, ntohs(port), map); + + i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; + off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; + + generic___set_le_bit(off, (void *)map->m_page_addrs[i]); +} + +void rds_cong_clear_bit(struct rds_cong_map *map, __be16 port) +{ + unsigned long i; + unsigned long off; + + rdsdebug("clearing congestion for %pI4:%u in map %p\n", + &map->m_addr, ntohs(port), map); + + i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; + off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; + + generic___clear_le_bit(off, (void *)map->m_page_addrs[i]); +} + +static int rds_cong_test_bit(struct rds_cong_map *map, __be16 port) +{ + unsigned long i; + unsigned long off; + + i = be16_to_cpu(port) / RDS_CONG_MAP_PAGE_BITS; + off = be16_to_cpu(port) % RDS_CONG_MAP_PAGE_BITS; + + return generic_test_le_bit(off, (void *)map->m_page_addrs[i]); +} + +void rds_cong_add_socket(struct rds_sock *rs) +{ + unsigned long flags; + + write_lock_irqsave(&rds_cong_monitor_lock, flags); + if (list_empty(&rs->rs_cong_list)) + list_add(&rs->rs_cong_list, &rds_cong_monitor); + write_unlock_irqrestore(&rds_cong_monitor_lock, flags); +} + +void rds_cong_remove_socket(struct rds_sock *rs) +{ + unsigned long flags; + struct rds_cong_map *map; + + write_lock_irqsave(&rds_cong_monitor_lock, flags); + list_del_init(&rs->rs_cong_list); + write_unlock_irqrestore(&rds_cong_monitor_lock, flags); + + /* update congestion map for now-closed port */ + spin_lock_irqsave(&rds_cong_lock, flags); + map = rds_cong_tree_walk(rs->rs_bound_addr, NULL); + spin_unlock_irqrestore(&rds_cong_lock, flags); + + if (map && rds_cong_test_bit(map, rs->rs_bound_port)) { + rds_cong_clear_bit(map, rs->rs_bound_port); + rds_cong_queue_updates(map); + } +} + +int rds_cong_wait(struct rds_cong_map *map, __be16 port, int nonblock, + struct rds_sock *rs) +{ + if (!rds_cong_test_bit(map, port)) + return 0; + if (nonblock) { + if (rs && rs->rs_cong_monitor) { + unsigned long flags; + + /* It would have been nice to have an atomic set_bit on + * a uint64_t. */ + spin_lock_irqsave(&rs->rs_lock, flags); + rs->rs_cong_mask |= RDS_CONG_MONITOR_MASK(ntohs(port)); + spin_unlock_irqrestore(&rs->rs_lock, flags); + + /* Test again - a congestion update may have arrived in + * the meantime. */ + if (!rds_cong_test_bit(map, port)) + return 0; + } + rds_stats_inc(s_cong_send_error); + return -ENOBUFS; + } + + rds_stats_inc(s_cong_send_blocked); + rdsdebug("waiting on map %p for port %u\n", map, be16_to_cpu(port)); + + return wait_event_interruptible(map->m_waitq, + !rds_cong_test_bit(map, port)); +} + +void rds_cong_exit(void) +{ + struct rb_node *node; + struct rds_cong_map *map; + unsigned long i; + + while ((node = rb_first(&rds_cong_tree))) { + map = rb_entry(node, struct rds_cong_map, m_rb_node); + rdsdebug("freeing map %p\n", map); + rb_erase(&map->m_rb_node, &rds_cong_tree); + for (i = 0; i < RDS_CONG_MAP_PAGES && map->m_page_addrs[i]; i++) + free_page(map->m_page_addrs[i]); + kfree(map); + } +} + +/* + * Allocate a RDS message containing a congestion update. + */ +struct rds_message *rds_cong_update_alloc(struct rds_connection *conn) +{ + struct rds_cong_map *map = conn->c_lcong; + struct rds_message *rm; + + rm = rds_message_map_pages(map->m_page_addrs, RDS_CONG_MAP_BYTES); + if (!IS_ERR(rm)) + rm->m_inc.i_hdr.h_flags = RDS_FLAG_CONG_BITMAP; + + return rm; +} -- cgit v1.2.3 From 0fbc78cbf51f3edee17c2ede42c7b3c13736158a Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:21 +0000 Subject: RDS: Transport code RDS supports multiple transports. While this initial submission only supports Infiniband transport, this abstraction allows others to be added. We're working on an iWARP transport, and also see UDP over DCB as another possibility. This code handles transport registration. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/transport.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 net/rds/transport.c (limited to 'net') diff --git a/net/rds/transport.c b/net/rds/transport.c new file mode 100644 index 000000000000..767da61ad2f3 --- /dev/null +++ b/net/rds/transport.c @@ -0,0 +1,117 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "loop.h" + +static LIST_HEAD(rds_transports); +static DECLARE_RWSEM(rds_trans_sem); + +int rds_trans_register(struct rds_transport *trans) +{ + BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ); + + down_write(&rds_trans_sem); + + list_add_tail(&trans->t_item, &rds_transports); + printk(KERN_INFO "Registered RDS/%s transport\n", trans->t_name); + + up_write(&rds_trans_sem); + + return 0; +} + +void rds_trans_unregister(struct rds_transport *trans) +{ + down_write(&rds_trans_sem); + + list_del_init(&trans->t_item); + printk(KERN_INFO "Unregistered RDS/%s transport\n", trans->t_name); + + up_write(&rds_trans_sem); +} + +struct rds_transport *rds_trans_get_preferred(__be32 addr) +{ + struct rds_transport *trans; + struct rds_transport *ret = NULL; + + if (IN_LOOPBACK(ntohl(addr))) + return &rds_loop_transport; + + down_read(&rds_trans_sem); + list_for_each_entry(trans, &rds_transports, t_item) { + if (trans->laddr_check(addr) == 0) { + ret = trans; + break; + } + } + up_read(&rds_trans_sem); + + return ret; +} + +/* + * This returns the number of stats entries in the snapshot and only + * copies them using the iter if there is enough space for them. The + * caller passes in the global stats so that we can size and copy while + * holding the lock. + */ +unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, + unsigned int avail) + +{ + struct rds_transport *trans; + unsigned int total = 0; + unsigned int part; + + rds_info_iter_unmap(iter); + down_read(&rds_trans_sem); + + list_for_each_entry(trans, &rds_transports, t_item) { + if (trans->stats_info_copy == NULL) + continue; + + part = trans->stats_info_copy(iter, avail); + avail -= min(avail, part); + total += part; + } + + up_read(&rds_trans_sem); + + return total; +} + -- cgit v1.2.3 From a8c879a7ee988eb67a5e85e87fa2cc953e0ca749 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:22 +0000 Subject: RDS: Info and stats RDS currently generates a lot of stats that are accessible via the rds-info utility. This code implements the support for this. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/info.c | 241 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/rds/info.h | 30 +++++++ net/rds/stats.c | 148 ++++++++++++++++++++++++++++++++++ 3 files changed, 419 insertions(+) create mode 100644 net/rds/info.c create mode 100644 net/rds/info.h create mode 100644 net/rds/stats.c (limited to 'net') diff --git a/net/rds/info.c b/net/rds/info.c new file mode 100644 index 000000000000..1d885535214d --- /dev/null +++ b/net/rds/info.c @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" + +/* + * This file implements a getsockopt() call which copies a set of fixed + * sized structs into a user-specified buffer as a means of providing + * read-only information about RDS. + * + * For a given information source there are a given number of fixed sized + * structs at a given time. The structs are only copied if the user-specified + * buffer is big enough. The destination pages that make up the buffer + * are pinned for the duration of the copy. + * + * This gives us the following benefits: + * + * - simple implementation, no copy "position" across multiple calls + * - consistent snapshot of an info source + * - atomic copy works well with whatever locking info source has + * - one portable tool to get rds info across implementations + * - long-lived tool can get info without allocating + * + * at the following costs: + * + * - info source copy must be pinned, may be "large" + */ + +struct rds_info_iterator { + struct page **pages; + void *addr; + unsigned long offset; +}; + +static DEFINE_SPINLOCK(rds_info_lock); +static rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; + +void rds_info_register_func(int optname, rds_info_func func) +{ + int offset = optname - RDS_INFO_FIRST; + + BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); + + spin_lock(&rds_info_lock); + BUG_ON(rds_info_funcs[offset] != NULL); + rds_info_funcs[offset] = func; + spin_unlock(&rds_info_lock); +} + +void rds_info_deregister_func(int optname, rds_info_func func) +{ + int offset = optname - RDS_INFO_FIRST; + + BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); + + spin_lock(&rds_info_lock); + BUG_ON(rds_info_funcs[offset] != func); + rds_info_funcs[offset] = NULL; + spin_unlock(&rds_info_lock); +} + +/* + * Typically we hold an atomic kmap across multiple rds_info_copy() calls + * because the kmap is so expensive. This must be called before using blocking + * operations while holding the mapping and as the iterator is torn down. + */ +void rds_info_iter_unmap(struct rds_info_iterator *iter) +{ + if (iter->addr != NULL) { + kunmap_atomic(iter->addr, KM_USER0); + iter->addr = NULL; + } +} + +/* + * get_user_pages() called flush_dcache_page() on the pages for us. + */ +void rds_info_copy(struct rds_info_iterator *iter, void *data, + unsigned long bytes) +{ + unsigned long this; + + while (bytes) { + if (iter->addr == NULL) + iter->addr = kmap_atomic(*iter->pages, KM_USER0); + + this = min(bytes, PAGE_SIZE - iter->offset); + + rdsdebug("page %p addr %p offset %lu this %lu data %p " + "bytes %lu\n", *iter->pages, iter->addr, + iter->offset, this, data, bytes); + + memcpy(iter->addr + iter->offset, data, this); + + data += this; + bytes -= this; + iter->offset += this; + + if (iter->offset == PAGE_SIZE) { + kunmap_atomic(iter->addr, KM_USER0); + iter->addr = NULL; + iter->offset = 0; + iter->pages++; + } + } +} + +/* + * @optval points to the userspace buffer that the information snapshot + * will be copied into. + * + * @optlen on input is the size of the buffer in userspace. @optlen + * on output is the size of the requested snapshot in bytes. + * + * This function returns -errno if there is a failure, particularly -ENOSPC + * if the given userspace buffer was not large enough to fit the snapshot. + * On success it returns the positive number of bytes of each array element + * in the snapshot. + */ +int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, + int __user *optlen) +{ + struct rds_info_iterator iter; + struct rds_info_lengths lens; + unsigned long nr_pages = 0; + unsigned long start; + unsigned long i; + rds_info_func func; + struct page **pages = NULL; + int ret; + int len; + int total; + + if (get_user(len, optlen)) { + ret = -EFAULT; + goto out; + } + + /* check for all kinds of wrapping and the like */ + start = (unsigned long)optval; + if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) { + ret = -EINVAL; + goto out; + } + + /* a 0 len call is just trying to probe its length */ + if (len == 0) + goto call_func; + + nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) + >> PAGE_SHIFT; + + pages = kmalloc(nr_pages * sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + ret = -ENOMEM; + goto out; + } + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, start, nr_pages, 1, 0, + pages, NULL); + up_read(¤t->mm->mmap_sem); + if (ret != nr_pages) { + if (ret > 0) + nr_pages = ret; + else + nr_pages = 0; + ret = -EAGAIN; /* XXX ? */ + goto out; + } + + rdsdebug("len %d nr_pages %lu\n", len, nr_pages); + +call_func: + func = rds_info_funcs[optname - RDS_INFO_FIRST]; + if (func == NULL) { + ret = -ENOPROTOOPT; + goto out; + } + + iter.pages = pages; + iter.addr = NULL; + iter.offset = start & (PAGE_SIZE - 1); + + func(sock, len, &iter, &lens); + BUG_ON(lens.each == 0); + + total = lens.nr * lens.each; + + rds_info_iter_unmap(&iter); + + if (total > len) { + len = total; + ret = -ENOSPC; + } else { + len = total; + ret = lens.each; + } + + if (put_user(len, optlen)) + ret = -EFAULT; + +out: + for (i = 0; pages != NULL && i < nr_pages; i++) + put_page(pages[i]); + kfree(pages); + + return ret; +} diff --git a/net/rds/info.h b/net/rds/info.h new file mode 100644 index 000000000000..b6c052ca7d22 --- /dev/null +++ b/net/rds/info.h @@ -0,0 +1,30 @@ +#ifndef _RDS_INFO_H +#define _RDS_INFO_H + +struct rds_info_lengths { + unsigned int nr; + unsigned int each; +}; + +struct rds_info_iterator; + +/* + * These functions must fill in the fields of @lens to reflect the size + * of the available info source. If the snapshot fits in @len then it + * should be copied using @iter. The caller will deduce if it was copied + * or not by comparing the lengths. + */ +typedef void (*rds_info_func)(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens); + +void rds_info_register_func(int optname, rds_info_func func); +void rds_info_deregister_func(int optname, rds_info_func func); +int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, + int __user *optlen); +void rds_info_copy(struct rds_info_iterator *iter, void *data, + unsigned long bytes); +void rds_info_iter_unmap(struct rds_info_iterator *iter); + + +#endif diff --git a/net/rds/stats.c b/net/rds/stats.c new file mode 100644 index 000000000000..637146893cf3 --- /dev/null +++ b/net/rds/stats.c @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" + +DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats); + +/* :.,$s/unsigned long\>.*\= sizeof(ctr.name)); + strncpy(ctr.name, names[i], sizeof(ctr.name) - 1); + ctr.value = values[i]; + + rds_info_copy(iter, &ctr, sizeof(ctr)); + } +} + +/* + * This gives global counters across all the transports. The strings + * are copied in so that the tool doesn't need knowledge of the specific + * stats that we're exporting. Some are pretty implementation dependent + * and may change over time. That doesn't stop them from being useful. + * + * This is the only function in the chain that knows about the byte granular + * length in userspace. It converts it to number of stat entries that the + * rest of the functions operate in. + */ +static void rds_stats_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds_statistics stats = {0, }; + uint64_t *src; + uint64_t *sum; + size_t i; + int cpu; + unsigned int avail; + + avail = len / sizeof(struct rds_info_counter); + + if (avail < ARRAY_SIZE(rds_stat_names)) { + avail = 0; + goto trans; + } + + for_each_online_cpu(cpu) { + src = (uint64_t *)&(per_cpu(rds_stats, cpu)); + sum = (uint64_t *)&stats; + for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) + *(sum++) += *(src++); + } + + rds_stats_info_copy(iter, (uint64_t *)&stats, rds_stat_names, + ARRAY_SIZE(rds_stat_names)); + avail -= ARRAY_SIZE(rds_stat_names); + +trans: + lens->each = sizeof(struct rds_info_counter); + lens->nr = rds_trans_stats_info_copy(iter, avail) + + ARRAY_SIZE(rds_stat_names); +} + +void rds_stats_exit(void) +{ + rds_info_deregister_func(RDS_INFO_COUNTERS, rds_stats_info); +} + +int __init rds_stats_init(void) +{ + rds_info_register_func(RDS_INFO_COUNTERS, rds_stats_info); + return 0; +} -- cgit v1.2.3 From 00e0f34c616603ba6500f41943cbf89eb4a8a5be Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:23 +0000 Subject: RDS: Connection handling While arguably the fact that the underlying transport needs a connection to convey RDS's datagrame reliably is not important to rds proper, the transports implemented so far (IB and TCP) have both been connection-oriented, and so the connection state machine-related code is in the common rds code. This patch also includes several work items, to handle connecting, sending, receiving, and shutdown. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/connection.c | 487 +++++++++++++++++++++++++++++++++++++++++++++++++++ net/rds/threads.c | 265 ++++++++++++++++++++++++++++ 2 files changed, 752 insertions(+) create mode 100644 net/rds/connection.c create mode 100644 net/rds/threads.c (limited to 'net') diff --git a/net/rds/connection.c b/net/rds/connection.c new file mode 100644 index 000000000000..273f064930a8 --- /dev/null +++ b/net/rds/connection.c @@ -0,0 +1,487 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "loop.h" +#include "rdma.h" + +#define RDS_CONNECTION_HASH_BITS 12 +#define RDS_CONNECTION_HASH_ENTRIES (1 << RDS_CONNECTION_HASH_BITS) +#define RDS_CONNECTION_HASH_MASK (RDS_CONNECTION_HASH_ENTRIES - 1) + +/* converting this to RCU is a chore for another day.. */ +static DEFINE_SPINLOCK(rds_conn_lock); +static unsigned long rds_conn_count; +static struct hlist_head rds_conn_hash[RDS_CONNECTION_HASH_ENTRIES]; +static struct kmem_cache *rds_conn_slab; + +static struct hlist_head *rds_conn_bucket(__be32 laddr, __be32 faddr) +{ + /* Pass NULL, don't need struct net for hash */ + unsigned long hash = inet_ehashfn(NULL, + be32_to_cpu(laddr), 0, + be32_to_cpu(faddr), 0); + return &rds_conn_hash[hash & RDS_CONNECTION_HASH_MASK]; +} + +#define rds_conn_info_set(var, test, suffix) do { \ + if (test) \ + var |= RDS_INFO_CONNECTION_FLAG_##suffix; \ +} while (0) + +static inline int rds_conn_is_sending(struct rds_connection *conn) +{ + int ret = 0; + + if (!mutex_trylock(&conn->c_send_lock)) + ret = 1; + else + mutex_unlock(&conn->c_send_lock); + + return ret; +} + +static struct rds_connection *rds_conn_lookup(struct hlist_head *head, + __be32 laddr, __be32 faddr, + struct rds_transport *trans) +{ + struct rds_connection *conn, *ret = NULL; + struct hlist_node *pos; + + hlist_for_each_entry(conn, pos, head, c_hash_node) { + if (conn->c_faddr == faddr && conn->c_laddr == laddr && + conn->c_trans == trans) { + ret = conn; + break; + } + } + rdsdebug("returning conn %p for %pI4 -> %pI4\n", ret, + &laddr, &faddr); + return ret; +} + +/* + * This is called by transports as they're bringing down a connection. + * It clears partial message state so that the transport can start sending + * and receiving over this connection again in the future. It is up to + * the transport to have serialized this call with its send and recv. + */ +void rds_conn_reset(struct rds_connection *conn) +{ + rdsdebug("connection %pI4 to %pI4 reset\n", + &conn->c_laddr, &conn->c_faddr); + + rds_stats_inc(s_conn_reset); + rds_send_reset(conn); + conn->c_flags = 0; + + /* Do not clear next_rx_seq here, else we cannot distinguish + * retransmitted packets from new packets, and will hand all + * of them to the application. That is not consistent with the + * reliability guarantees of RDS. */ +} + +/* + * There is only every one 'conn' for a given pair of addresses in the + * system at a time. They contain messages to be retransmitted and so + * span the lifetime of the actual underlying transport connections. + * + * For now they are not garbage collected once they're created. They + * are torn down as the module is removed, if ever. + */ +static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, + struct rds_transport *trans, gfp_t gfp, + int is_outgoing) +{ + struct rds_connection *conn, *tmp, *parent = NULL; + struct hlist_head *head = rds_conn_bucket(laddr, faddr); + unsigned long flags; + int ret; + + spin_lock_irqsave(&rds_conn_lock, flags); + conn = rds_conn_lookup(head, laddr, faddr, trans); + if (conn + && conn->c_loopback + && conn->c_trans != &rds_loop_transport + && !is_outgoing) { + /* This is a looped back IB connection, and we're + * called by the code handling the incoming connect. + * We need a second connection object into which we + * can stick the other QP. */ + parent = conn; + conn = parent->c_passive; + } + spin_unlock_irqrestore(&rds_conn_lock, flags); + if (conn) + goto out; + + conn = kmem_cache_alloc(rds_conn_slab, gfp); + if (conn == NULL) { + conn = ERR_PTR(-ENOMEM); + goto out; + } + + memset(conn, 0, sizeof(*conn)); + + INIT_HLIST_NODE(&conn->c_hash_node); + conn->c_version = RDS_PROTOCOL_3_0; + conn->c_laddr = laddr; + conn->c_faddr = faddr; + spin_lock_init(&conn->c_lock); + conn->c_next_tx_seq = 1; + + mutex_init(&conn->c_send_lock); + INIT_LIST_HEAD(&conn->c_send_queue); + INIT_LIST_HEAD(&conn->c_retrans); + + ret = rds_cong_get_maps(conn); + if (ret) { + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(ret); + goto out; + } + + /* + * This is where a connection becomes loopback. If *any* RDS sockets + * can bind to the destination address then we'd rather the messages + * flow through loopback rather than either transport. + */ + if (rds_trans_get_preferred(faddr)) { + conn->c_loopback = 1; + if (is_outgoing && trans->t_prefer_loopback) { + /* "outgoing" connection - and the transport + * says it wants the connection handled by the + * loopback transport. This is what TCP does. + */ + trans = &rds_loop_transport; + } + } + + conn->c_trans = trans; + + ret = trans->conn_alloc(conn, gfp); + if (ret) { + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(ret); + goto out; + } + + atomic_set(&conn->c_state, RDS_CONN_DOWN); + conn->c_reconnect_jiffies = 0; + INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); + INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); + INIT_DELAYED_WORK(&conn->c_conn_w, rds_connect_worker); + INIT_WORK(&conn->c_down_w, rds_shutdown_worker); + mutex_init(&conn->c_cm_lock); + conn->c_flags = 0; + + rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n", + conn, &laddr, &faddr, + trans->t_name ? trans->t_name : "[unknown]", + is_outgoing ? "(outgoing)" : ""); + + spin_lock_irqsave(&rds_conn_lock, flags); + if (parent == NULL) { + tmp = rds_conn_lookup(head, laddr, faddr, trans); + if (tmp == NULL) + hlist_add_head(&conn->c_hash_node, head); + } else { + tmp = parent->c_passive; + if (!tmp) + parent->c_passive = conn; + } + + if (tmp) { + trans->conn_free(conn->c_transport_data); + kmem_cache_free(rds_conn_slab, conn); + conn = tmp; + } else { + rds_cong_add_conn(conn); + rds_conn_count++; + } + + spin_unlock_irqrestore(&rds_conn_lock, flags); + +out: + return conn; +} + +struct rds_connection *rds_conn_create(__be32 laddr, __be32 faddr, + struct rds_transport *trans, gfp_t gfp) +{ + return __rds_conn_create(laddr, faddr, trans, gfp, 0); +} + +struct rds_connection *rds_conn_create_outgoing(__be32 laddr, __be32 faddr, + struct rds_transport *trans, gfp_t gfp) +{ + return __rds_conn_create(laddr, faddr, trans, gfp, 1); +} + +void rds_conn_destroy(struct rds_connection *conn) +{ + struct rds_message *rm, *rtmp; + + rdsdebug("freeing conn %p for %pI4 -> " + "%pI4\n", conn, &conn->c_laddr, + &conn->c_faddr); + + hlist_del_init(&conn->c_hash_node); + + /* wait for the rds thread to shut it down */ + atomic_set(&conn->c_state, RDS_CONN_ERROR); + cancel_delayed_work(&conn->c_conn_w); + queue_work(rds_wq, &conn->c_down_w); + flush_workqueue(rds_wq); + + /* tear down queued messages */ + list_for_each_entry_safe(rm, rtmp, + &conn->c_send_queue, + m_conn_item) { + list_del_init(&rm->m_conn_item); + BUG_ON(!list_empty(&rm->m_sock_item)); + rds_message_put(rm); + } + if (conn->c_xmit_rm) + rds_message_put(conn->c_xmit_rm); + + conn->c_trans->conn_free(conn->c_transport_data); + + /* + * The congestion maps aren't freed up here. They're + * freed by rds_cong_exit() after all the connections + * have been freed. + */ + rds_cong_remove_conn(conn); + + BUG_ON(!list_empty(&conn->c_retrans)); + kmem_cache_free(rds_conn_slab, conn); + + rds_conn_count--; +} + +static void rds_conn_message_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens, + int want_send) +{ + struct hlist_head *head; + struct hlist_node *pos; + struct list_head *list; + struct rds_connection *conn; + struct rds_message *rm; + unsigned long flags; + unsigned int total = 0; + size_t i; + + len /= sizeof(struct rds_info_message); + + spin_lock_irqsave(&rds_conn_lock, flags); + + for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); + i++, head++) { + hlist_for_each_entry(conn, pos, head, c_hash_node) { + if (want_send) + list = &conn->c_send_queue; + else + list = &conn->c_retrans; + + spin_lock(&conn->c_lock); + + /* XXX too lazy to maintain counts.. */ + list_for_each_entry(rm, list, m_conn_item) { + total++; + if (total <= len) + rds_inc_info_copy(&rm->m_inc, iter, + conn->c_laddr, + conn->c_faddr, 0); + } + + spin_unlock(&conn->c_lock); + } + } + + spin_unlock_irqrestore(&rds_conn_lock, flags); + + lens->nr = total; + lens->each = sizeof(struct rds_info_message); +} + +static void rds_conn_message_info_send(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + rds_conn_message_info(sock, len, iter, lens, 1); +} + +static void rds_conn_message_info_retrans(struct socket *sock, + unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + rds_conn_message_info(sock, len, iter, lens, 0); +} + +void rds_for_each_conn_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens, + int (*visitor)(struct rds_connection *, void *), + size_t item_len) +{ + uint64_t buffer[(item_len + 7) / 8]; + struct hlist_head *head; + struct hlist_node *pos; + struct hlist_node *tmp; + struct rds_connection *conn; + unsigned long flags; + size_t i; + + spin_lock_irqsave(&rds_conn_lock, flags); + + lens->nr = 0; + lens->each = item_len; + + for (i = 0, head = rds_conn_hash; i < ARRAY_SIZE(rds_conn_hash); + i++, head++) { + hlist_for_each_entry_safe(conn, pos, tmp, head, c_hash_node) { + + /* XXX no c_lock usage.. */ + if (!visitor(conn, buffer)) + continue; + + /* We copy as much as we can fit in the buffer, + * but we count all items so that the caller + * can resize the buffer. */ + if (len >= item_len) { + rds_info_copy(iter, buffer, item_len); + len -= item_len; + } + lens->nr++; + } + } + + spin_unlock_irqrestore(&rds_conn_lock, flags); +} + +static int rds_conn_info_visitor(struct rds_connection *conn, + void *buffer) +{ + struct rds_info_connection *cinfo = buffer; + + cinfo->next_tx_seq = conn->c_next_tx_seq; + cinfo->next_rx_seq = conn->c_next_rx_seq; + cinfo->laddr = conn->c_laddr; + cinfo->faddr = conn->c_faddr; + strncpy(cinfo->transport, conn->c_trans->t_name, + sizeof(cinfo->transport)); + cinfo->flags = 0; + + rds_conn_info_set(cinfo->flags, + rds_conn_is_sending(conn), SENDING); + /* XXX Future: return the state rather than these funky bits */ + rds_conn_info_set(cinfo->flags, + atomic_read(&conn->c_state) == RDS_CONN_CONNECTING, + CONNECTING); + rds_conn_info_set(cinfo->flags, + atomic_read(&conn->c_state) == RDS_CONN_UP, + CONNECTED); + return 1; +} + +static void rds_conn_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + rds_for_each_conn_info(sock, len, iter, lens, + rds_conn_info_visitor, + sizeof(struct rds_info_connection)); +} + +int __init rds_conn_init(void) +{ + rds_conn_slab = kmem_cache_create("rds_connection", + sizeof(struct rds_connection), + 0, 0, NULL); + if (rds_conn_slab == NULL) + return -ENOMEM; + + rds_info_register_func(RDS_INFO_CONNECTIONS, rds_conn_info); + rds_info_register_func(RDS_INFO_SEND_MESSAGES, + rds_conn_message_info_send); + rds_info_register_func(RDS_INFO_RETRANS_MESSAGES, + rds_conn_message_info_retrans); + + return 0; +} + +void rds_conn_exit(void) +{ + rds_loop_exit(); + + WARN_ON(!hlist_empty(rds_conn_hash)); + + kmem_cache_destroy(rds_conn_slab); + + rds_info_deregister_func(RDS_INFO_CONNECTIONS, rds_conn_info); + rds_info_deregister_func(RDS_INFO_SEND_MESSAGES, + rds_conn_message_info_send); + rds_info_deregister_func(RDS_INFO_RETRANS_MESSAGES, + rds_conn_message_info_retrans); +} + +/* + * Force a disconnect + */ +void rds_conn_drop(struct rds_connection *conn) +{ + atomic_set(&conn->c_state, RDS_CONN_ERROR); + queue_work(rds_wq, &conn->c_down_w); +} + +/* + * An error occurred on the connection + */ +void +__rds_conn_error(struct rds_connection *conn, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vprintk(fmt, ap); + va_end(ap); + + rds_conn_drop(conn); +} diff --git a/net/rds/threads.c b/net/rds/threads.c new file mode 100644 index 000000000000..828a1bf9ea92 --- /dev/null +++ b/net/rds/threads.c @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include + +#include "rds.h" + +/* + * All of connection management is simplified by serializing it through + * work queues that execute in a connection managing thread. + * + * TCP wants to send acks through sendpage() in response to data_ready(), + * but it needs a process context to do so. + * + * The receive paths need to allocate but can't drop packets (!) so we have + * a thread around to block allocating if the receive fast path sees an + * allocation failure. + */ + +/* Grand Unified Theory of connection life cycle: + * At any point in time, the connection can be in one of these states: + * DOWN, CONNECTING, UP, DISCONNECTING, ERROR + * + * The following transitions are possible: + * ANY -> ERROR + * UP -> DISCONNECTING + * ERROR -> DISCONNECTING + * DISCONNECTING -> DOWN + * DOWN -> CONNECTING + * CONNECTING -> UP + * + * Transition to state DISCONNECTING/DOWN: + * - Inside the shutdown worker; synchronizes with xmit path + * through c_send_lock, and with connection management callbacks + * via c_cm_lock. + * + * For receive callbacks, we rely on the underlying transport + * (TCP, IB/RDMA) to provide the necessary synchronisation. + */ +struct workqueue_struct *rds_wq; + +void rds_connect_complete(struct rds_connection *conn) +{ + if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { + printk(KERN_WARNING "%s: Cannot transition to state UP, " + "current state is %d\n", + __func__, + atomic_read(&conn->c_state)); + atomic_set(&conn->c_state, RDS_CONN_ERROR); + queue_work(rds_wq, &conn->c_down_w); + return; + } + + rdsdebug("conn %p for %pI4 to %pI4 complete\n", + conn, &conn->c_laddr, &conn->c_faddr); + + conn->c_reconnect_jiffies = 0; + set_bit(0, &conn->c_map_queued); + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + queue_delayed_work(rds_wq, &conn->c_recv_w, 0); +} + +/* + * This random exponential backoff is relied on to eventually resolve racing + * connects. + * + * If connect attempts race then both parties drop both connections and come + * here to wait for a random amount of time before trying again. Eventually + * the backoff range will be so much greater than the time it takes to + * establish a connection that one of the pair will establish the connection + * before the other's random delay fires. + * + * Connection attempts that arrive while a connection is already established + * are also considered to be racing connects. This lets a connection from + * a rebooted machine replace an existing stale connection before the transport + * notices that the connection has failed. + * + * We should *always* start with a random backoff; otherwise a broken connection + * will always take several iterations to be re-established. + */ +static void rds_queue_reconnect(struct rds_connection *conn) +{ + unsigned long rand; + + rdsdebug("conn %p for %pI4 to %pI4 reconnect jiffies %lu\n", + conn, &conn->c_laddr, &conn->c_faddr, + conn->c_reconnect_jiffies); + + set_bit(RDS_RECONNECT_PENDING, &conn->c_flags); + if (conn->c_reconnect_jiffies == 0) { + conn->c_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; + queue_delayed_work(rds_wq, &conn->c_conn_w, 0); + return; + } + + get_random_bytes(&rand, sizeof(rand)); + rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n", + rand % conn->c_reconnect_jiffies, conn->c_reconnect_jiffies, + conn, &conn->c_laddr, &conn->c_faddr); + queue_delayed_work(rds_wq, &conn->c_conn_w, + rand % conn->c_reconnect_jiffies); + + conn->c_reconnect_jiffies = min(conn->c_reconnect_jiffies * 2, + rds_sysctl_reconnect_max_jiffies); +} + +void rds_connect_worker(struct work_struct *work) +{ + struct rds_connection *conn = container_of(work, struct rds_connection, c_conn_w.work); + int ret; + + clear_bit(RDS_RECONNECT_PENDING, &conn->c_flags); + if (rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { + ret = conn->c_trans->conn_connect(conn); + rdsdebug("conn %p for %pI4 to %pI4 dispatched, ret %d\n", + conn, &conn->c_laddr, &conn->c_faddr, ret); + + if (ret) { + if (rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_DOWN)) + rds_queue_reconnect(conn); + else + rds_conn_error(conn, "RDS: connect failed\n"); + } + } +} + +void rds_shutdown_worker(struct work_struct *work) +{ + struct rds_connection *conn = container_of(work, struct rds_connection, c_down_w); + + /* shut it down unless it's down already */ + if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_DOWN)) { + /* + * Quiesce the connection mgmt handlers before we start tearing + * things down. We don't hold the mutex for the entire + * duration of the shutdown operation, else we may be + * deadlocking with the CM handler. Instead, the CM event + * handler is supposed to check for state DISCONNECTING + */ + mutex_lock(&conn->c_cm_lock); + if (!rds_conn_transition(conn, RDS_CONN_UP, RDS_CONN_DISCONNECTING) + && !rds_conn_transition(conn, RDS_CONN_ERROR, RDS_CONN_DISCONNECTING)) { + rds_conn_error(conn, "shutdown called in state %d\n", + atomic_read(&conn->c_state)); + mutex_unlock(&conn->c_cm_lock); + return; + } + mutex_unlock(&conn->c_cm_lock); + + mutex_lock(&conn->c_send_lock); + conn->c_trans->conn_shutdown(conn); + rds_conn_reset(conn); + mutex_unlock(&conn->c_send_lock); + + if (!rds_conn_transition(conn, RDS_CONN_DISCONNECTING, RDS_CONN_DOWN)) { + /* This can happen - eg when we're in the middle of tearing + * down the connection, and someone unloads the rds module. + * Quite reproduceable with loopback connections. + * Mostly harmless. + */ + rds_conn_error(conn, + "%s: failed to transition to state DOWN, " + "current state is %d\n", + __func__, + atomic_read(&conn->c_state)); + return; + } + } + + /* Then reconnect if it's still live. + * The passive side of an IB loopback connection is never added + * to the conn hash, so we never trigger a reconnect on this + * conn - the reconnect is always triggered by the active peer. */ + cancel_delayed_work(&conn->c_conn_w); + if (!hlist_unhashed(&conn->c_hash_node)) + rds_queue_reconnect(conn); +} + +void rds_send_worker(struct work_struct *work) +{ + struct rds_connection *conn = container_of(work, struct rds_connection, c_send_w.work); + int ret; + + if (rds_conn_state(conn) == RDS_CONN_UP) { + ret = rds_send_xmit(conn); + rdsdebug("conn %p ret %d\n", conn, ret); + switch (ret) { + case -EAGAIN: + rds_stats_inc(s_send_immediate_retry); + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + break; + case -ENOMEM: + rds_stats_inc(s_send_delayed_retry); + queue_delayed_work(rds_wq, &conn->c_send_w, 2); + default: + break; + } + } +} + +void rds_recv_worker(struct work_struct *work) +{ + struct rds_connection *conn = container_of(work, struct rds_connection, c_recv_w.work); + int ret; + + if (rds_conn_state(conn) == RDS_CONN_UP) { + ret = conn->c_trans->recv(conn); + rdsdebug("conn %p ret %d\n", conn, ret); + switch (ret) { + case -EAGAIN: + rds_stats_inc(s_recv_immediate_retry); + queue_delayed_work(rds_wq, &conn->c_recv_w, 0); + break; + case -ENOMEM: + rds_stats_inc(s_recv_delayed_retry); + queue_delayed_work(rds_wq, &conn->c_recv_w, 2); + default: + break; + } + } +} + +void rds_threads_exit(void) +{ + destroy_workqueue(rds_wq); +} + +int __init rds_threads_init(void) +{ + rds_wq = create_singlethread_workqueue("krdsd"); + if (rds_wq == NULL) + return -ENOMEM; + + return 0; +} -- cgit v1.2.3 From 13796bf9edc9d2008cfaa3ebea89312ced0d44a9 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:24 +0000 Subject: RDS: loopback A simple rds transport to handle loopback connections. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/loop.c | 188 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/rds/loop.h | 9 +++ 2 files changed, 197 insertions(+) create mode 100644 net/rds/loop.c create mode 100644 net/rds/loop.h (limited to 'net') diff --git a/net/rds/loop.c b/net/rds/loop.c new file mode 100644 index 000000000000..4a61997f554d --- /dev/null +++ b/net/rds/loop.c @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include + +#include "rds.h" +#include "loop.h" + +static DEFINE_SPINLOCK(loop_conns_lock); +static LIST_HEAD(loop_conns); + +/* + * This 'loopback' transport is a special case for flows that originate + * and terminate on the same machine. + * + * Connection build-up notices if the destination address is thought of + * as a local address by a transport. At that time it decides to use the + * loopback transport instead of the bound transport of the sending socket. + * + * The loopback transport's sending path just hands the sent rds_message + * straight to the receiving path via an embedded rds_incoming. + */ + +/* + * Usually a message transits both the sender and receiver's conns as it + * flows to the receiver. In the loopback case, though, the receive path + * is handed the sending conn so the sense of the addresses is reversed. + */ +static int rds_loop_xmit(struct rds_connection *conn, struct rds_message *rm, + unsigned int hdr_off, unsigned int sg, + unsigned int off) +{ + BUG_ON(hdr_off || sg || off); + + rds_inc_init(&rm->m_inc, conn, conn->c_laddr); + rds_message_addref(rm); /* for the inc */ + + rds_recv_incoming(conn, conn->c_laddr, conn->c_faddr, &rm->m_inc, + GFP_KERNEL, KM_USER0); + + rds_send_drop_acked(conn, be64_to_cpu(rm->m_inc.i_hdr.h_sequence), + NULL); + + rds_inc_put(&rm->m_inc); + + return sizeof(struct rds_header) + be32_to_cpu(rm->m_inc.i_hdr.h_len); +} + +static int rds_loop_xmit_cong_map(struct rds_connection *conn, + struct rds_cong_map *map, + unsigned long offset) +{ + unsigned long i; + + BUG_ON(offset); + BUG_ON(map != conn->c_lcong); + + for (i = 0; i < RDS_CONG_MAP_PAGES; i++) { + memcpy((void *)conn->c_fcong->m_page_addrs[i], + (void *)map->m_page_addrs[i], PAGE_SIZE); + } + + rds_cong_map_updated(conn->c_fcong, ~(u64) 0); + + return sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; +} + +/* we need to at least give the thread something to succeed */ +static int rds_loop_recv(struct rds_connection *conn) +{ + return 0; +} + +struct rds_loop_connection { + struct list_head loop_node; + struct rds_connection *conn; +}; + +/* + * Even the loopback transport needs to keep track of its connections, + * so it can call rds_conn_destroy() on them on exit. N.B. there are + * 1+ loopback addresses (127.*.*.*) so it's not a bug to have + * multiple loopback conns allocated, although rather useless. + */ +static int rds_loop_conn_alloc(struct rds_connection *conn, gfp_t gfp) +{ + struct rds_loop_connection *lc; + unsigned long flags; + + lc = kzalloc(sizeof(struct rds_loop_connection), GFP_KERNEL); + if (lc == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&lc->loop_node); + lc->conn = conn; + conn->c_transport_data = lc; + + spin_lock_irqsave(&loop_conns_lock, flags); + list_add_tail(&lc->loop_node, &loop_conns); + spin_unlock_irqrestore(&loop_conns_lock, flags); + + return 0; +} + +static void rds_loop_conn_free(void *arg) +{ + struct rds_loop_connection *lc = arg; + rdsdebug("lc %p\n", lc); + list_del(&lc->loop_node); + kfree(lc); +} + +static int rds_loop_conn_connect(struct rds_connection *conn) +{ + rds_connect_complete(conn); + return 0; +} + +static void rds_loop_conn_shutdown(struct rds_connection *conn) +{ +} + +void rds_loop_exit(void) +{ + struct rds_loop_connection *lc, *_lc; + LIST_HEAD(tmp_list); + + /* avoid calling conn_destroy with irqs off */ + spin_lock_irq(&loop_conns_lock); + list_splice(&loop_conns, &tmp_list); + INIT_LIST_HEAD(&loop_conns); + spin_unlock_irq(&loop_conns_lock); + + list_for_each_entry_safe(lc, _lc, &tmp_list, loop_node) { + WARN_ON(lc->conn->c_passive); + rds_conn_destroy(lc->conn); + } +} + +/* + * This is missing .xmit_* because loop doesn't go through generic + * rds_send_xmit() and doesn't call rds_recv_incoming(). .listen_stop and + * .laddr_check are missing because transport.c doesn't iterate over + * rds_loop_transport. + */ +struct rds_transport rds_loop_transport = { + .xmit = rds_loop_xmit, + .xmit_cong_map = rds_loop_xmit_cong_map, + .recv = rds_loop_recv, + .conn_alloc = rds_loop_conn_alloc, + .conn_free = rds_loop_conn_free, + .conn_connect = rds_loop_conn_connect, + .conn_shutdown = rds_loop_conn_shutdown, + .inc_copy_to_user = rds_message_inc_copy_to_user, + .inc_purge = rds_message_inc_purge, + .inc_free = rds_message_inc_free, + .t_name = "loopback", +}; diff --git a/net/rds/loop.h b/net/rds/loop.h new file mode 100644 index 000000000000..f32b0939a04d --- /dev/null +++ b/net/rds/loop.h @@ -0,0 +1,9 @@ +#ifndef _RDS_LOOP_H +#define _RDS_LOOP_H + +/* loop.c */ +extern struct rds_transport rds_loop_transport; + +void rds_loop_exit(void); + +#endif -- cgit v1.2.3 From 3e5048495c8569bfdd552750e0315973c61e7c93 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:25 +0000 Subject: RDS: sysctls RDS exposes a few tunable parameters via sysctls. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/sysctl.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 net/rds/sysctl.c (limited to 'net') diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c new file mode 100644 index 000000000000..307dc5c1be15 --- /dev/null +++ b/net/rds/sysctl.c @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" + +static struct ctl_table_header *rds_sysctl_reg_table; + +static unsigned long rds_sysctl_reconnect_min = 1; +static unsigned long rds_sysctl_reconnect_max = ~0UL; + +unsigned long rds_sysctl_reconnect_min_jiffies; +unsigned long rds_sysctl_reconnect_max_jiffies = HZ; + +unsigned int rds_sysctl_max_unacked_packets = 8; +unsigned int rds_sysctl_max_unacked_bytes = (16 << 20); + +unsigned int rds_sysctl_ping_enable = 1; + +static ctl_table rds_sysctl_rds_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "reconnect_min_delay_ms", + .data = &rds_sysctl_reconnect_min_jiffies, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .extra1 = &rds_sysctl_reconnect_min, + .extra2 = &rds_sysctl_reconnect_max_jiffies, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "reconnect_max_delay_ms", + .data = &rds_sysctl_reconnect_max_jiffies, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .extra1 = &rds_sysctl_reconnect_min_jiffies, + .extra2 = &rds_sysctl_reconnect_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_unacked_packets", + .data = &rds_sysctl_max_unacked_packets, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_unacked_bytes", + .data = &rds_sysctl_max_unacked_bytes, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "ping_enable", + .data = &rds_sysctl_ping_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0} +}; + +static struct ctl_path rds_sysctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, + { } +}; + + +void rds_sysctl_exit(void) +{ + if (rds_sysctl_reg_table) + unregister_sysctl_table(rds_sysctl_reg_table); +} + +int __init rds_sysctl_init(void) +{ + rds_sysctl_reconnect_min = msecs_to_jiffies(1); + rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; + + rds_sysctl_reg_table = register_sysctl_paths(rds_sysctl_path, rds_sysctl_rds_table); + if (rds_sysctl_reg_table == NULL) + return -ENOMEM; + return 0; +} -- cgit v1.2.3 From 7875e18e09961d29f30424c5e2e48e704dc3789b Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:26 +0000 Subject: RDS: Message parsing Parsing of newly-received RDS message headers (including ext. headers) and copy-to/from-user routines. page.c implements a per-cpu page remainder cache, to reduce the number of allocations needed for small datagrams. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/message.c | 402 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/rds/page.c | 221 ++++++++++++++++++++++++++++++ 2 files changed, 623 insertions(+) create mode 100644 net/rds/message.c create mode 100644 net/rds/page.c (limited to 'net') diff --git a/net/rds/message.c b/net/rds/message.c new file mode 100644 index 000000000000..5a15dc8d0cd7 --- /dev/null +++ b/net/rds/message.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "rds.h" +#include "rdma.h" + +static DECLARE_WAIT_QUEUE_HEAD(rds_message_flush_waitq); + +static unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { +[RDS_EXTHDR_NONE] = 0, +[RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version), +[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma), +[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest), +}; + + +void rds_message_addref(struct rds_message *rm) +{ + rdsdebug("addref rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); + atomic_inc(&rm->m_refcount); +} + +/* + * This relies on dma_map_sg() not touching sg[].page during merging. + */ +static void rds_message_purge(struct rds_message *rm) +{ + unsigned long i; + + if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) + return; + + for (i = 0; i < rm->m_nents; i++) { + rdsdebug("putting data page %p\n", (void *)sg_page(&rm->m_sg[i])); + /* XXX will have to put_page for page refs */ + __free_page(sg_page(&rm->m_sg[i])); + } + rm->m_nents = 0; + + if (rm->m_rdma_op) + rds_rdma_free_op(rm->m_rdma_op); + if (rm->m_rdma_mr) + rds_mr_put(rm->m_rdma_mr); +} + +void rds_message_inc_purge(struct rds_incoming *inc) +{ + struct rds_message *rm = container_of(inc, struct rds_message, m_inc); + rds_message_purge(rm); +} + +void rds_message_put(struct rds_message *rm) +{ + rdsdebug("put rm %p ref %d\n", rm, atomic_read(&rm->m_refcount)); + + if (atomic_dec_and_test(&rm->m_refcount)) { + BUG_ON(!list_empty(&rm->m_sock_item)); + BUG_ON(!list_empty(&rm->m_conn_item)); + rds_message_purge(rm); + + kfree(rm); + } +} + +void rds_message_inc_free(struct rds_incoming *inc) +{ + struct rds_message *rm = container_of(inc, struct rds_message, m_inc); + rds_message_put(rm); +} + +void rds_message_populate_header(struct rds_header *hdr, __be16 sport, + __be16 dport, u64 seq) +{ + hdr->h_flags = 0; + hdr->h_sport = sport; + hdr->h_dport = dport; + hdr->h_sequence = cpu_to_be64(seq); + hdr->h_exthdr[0] = RDS_EXTHDR_NONE; +} + +int rds_message_add_extension(struct rds_header *hdr, + unsigned int type, const void *data, unsigned int len) +{ + unsigned int ext_len = sizeof(u8) + len; + unsigned char *dst; + + /* For now, refuse to add more than one extension header */ + if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE) + return 0; + + if (type >= __RDS_EXTHDR_MAX + || len != rds_exthdr_size[type]) + return 0; + + if (ext_len >= RDS_HEADER_EXT_SPACE) + return 0; + dst = hdr->h_exthdr; + + *dst++ = type; + memcpy(dst, data, len); + + dst[len] = RDS_EXTHDR_NONE; + return 1; +} + +/* + * If a message has extension headers, retrieve them here. + * Call like this: + * + * unsigned int pos = 0; + * + * while (1) { + * buflen = sizeof(buffer); + * type = rds_message_next_extension(hdr, &pos, buffer, &buflen); + * if (type == RDS_EXTHDR_NONE) + * break; + * ... + * } + */ +int rds_message_next_extension(struct rds_header *hdr, + unsigned int *pos, void *buf, unsigned int *buflen) +{ + unsigned int offset, ext_type, ext_len; + u8 *src = hdr->h_exthdr; + + offset = *pos; + if (offset >= RDS_HEADER_EXT_SPACE) + goto none; + + /* Get the extension type and length. For now, the + * length is implied by the extension type. */ + ext_type = src[offset++]; + + if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX) + goto none; + ext_len = rds_exthdr_size[ext_type]; + if (offset + ext_len > RDS_HEADER_EXT_SPACE) + goto none; + + *pos = offset + ext_len; + if (ext_len < *buflen) + *buflen = ext_len; + memcpy(buf, src + offset, *buflen); + return ext_type; + +none: + *pos = RDS_HEADER_EXT_SPACE; + *buflen = 0; + return RDS_EXTHDR_NONE; +} + +int rds_message_add_version_extension(struct rds_header *hdr, unsigned int version) +{ + struct rds_ext_header_version ext_hdr; + + ext_hdr.h_version = cpu_to_be32(version); + return rds_message_add_extension(hdr, RDS_EXTHDR_VERSION, &ext_hdr, sizeof(ext_hdr)); +} + +int rds_message_get_version_extension(struct rds_header *hdr, unsigned int *version) +{ + struct rds_ext_header_version ext_hdr; + unsigned int pos = 0, len = sizeof(ext_hdr); + + /* We assume the version extension is the only one present */ + if (rds_message_next_extension(hdr, &pos, &ext_hdr, &len) != RDS_EXTHDR_VERSION) + return 0; + *version = be32_to_cpu(ext_hdr.h_version); + return 1; +} + +int rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset) +{ + struct rds_ext_header_rdma_dest ext_hdr; + + ext_hdr.h_rdma_rkey = cpu_to_be32(r_key); + ext_hdr.h_rdma_offset = cpu_to_be32(offset); + return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr)); +} + +struct rds_message *rds_message_alloc(unsigned int nents, gfp_t gfp) +{ + struct rds_message *rm; + + rm = kzalloc(sizeof(struct rds_message) + + (nents * sizeof(struct scatterlist)), gfp); + if (!rm) + goto out; + + if (nents) + sg_init_table(rm->m_sg, nents); + atomic_set(&rm->m_refcount, 1); + INIT_LIST_HEAD(&rm->m_sock_item); + INIT_LIST_HEAD(&rm->m_conn_item); + spin_lock_init(&rm->m_rs_lock); + +out: + return rm; +} + +struct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) +{ + struct rds_message *rm; + unsigned int i; + + rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); + if (rm == NULL) + return ERR_PTR(-ENOMEM); + + set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); + rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); + rm->m_nents = ceil(total_len, PAGE_SIZE); + + for (i = 0; i < rm->m_nents; ++i) { + sg_set_page(&rm->m_sg[i], + virt_to_page(page_addrs[i]), + PAGE_SIZE, 0); + } + + return rm; +} + +struct rds_message *rds_message_copy_from_user(struct iovec *first_iov, + size_t total_len) +{ + unsigned long to_copy; + unsigned long iov_off; + unsigned long sg_off; + struct rds_message *rm; + struct iovec *iov; + struct scatterlist *sg; + int ret; + + rm = rds_message_alloc(ceil(total_len, PAGE_SIZE), GFP_KERNEL); + if (rm == NULL) { + ret = -ENOMEM; + goto out; + } + + rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); + + /* + * now allocate and copy in the data payload. + */ + sg = rm->m_sg; + iov = first_iov; + iov_off = 0; + sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ + + while (total_len) { + if (sg_page(sg) == NULL) { + ret = rds_page_remainder_alloc(sg, total_len, + GFP_HIGHUSER); + if (ret) + goto out; + rm->m_nents++; + sg_off = 0; + } + + while (iov_off == iov->iov_len) { + iov_off = 0; + iov++; + } + + to_copy = min(iov->iov_len - iov_off, sg->length - sg_off); + to_copy = min_t(size_t, to_copy, total_len); + + rdsdebug("copying %lu bytes from user iov [%p, %zu] + %lu to " + "sg [%p, %u, %u] + %lu\n", + to_copy, iov->iov_base, iov->iov_len, iov_off, + (void *)sg_page(sg), sg->offset, sg->length, sg_off); + + ret = rds_page_copy_from_user(sg_page(sg), sg->offset + sg_off, + iov->iov_base + iov_off, + to_copy); + if (ret) + goto out; + + iov_off += to_copy; + total_len -= to_copy; + sg_off += to_copy; + + if (sg_off == sg->length) + sg++; + } + + ret = 0; +out: + if (ret) { + if (rm) + rds_message_put(rm); + rm = ERR_PTR(ret); + } + return rm; +} + +int rds_message_inc_copy_to_user(struct rds_incoming *inc, + struct iovec *first_iov, size_t size) +{ + struct rds_message *rm; + struct iovec *iov; + struct scatterlist *sg; + unsigned long to_copy; + unsigned long iov_off; + unsigned long vec_off; + int copied; + int ret; + u32 len; + + rm = container_of(inc, struct rds_message, m_inc); + len = be32_to_cpu(rm->m_inc.i_hdr.h_len); + + iov = first_iov; + iov_off = 0; + sg = rm->m_sg; + vec_off = 0; + copied = 0; + + while (copied < size && copied < len) { + while (iov_off == iov->iov_len) { + iov_off = 0; + iov++; + } + + to_copy = min(iov->iov_len - iov_off, sg->length - vec_off); + to_copy = min_t(size_t, to_copy, size - copied); + to_copy = min_t(unsigned long, to_copy, len - copied); + + rdsdebug("copying %lu bytes to user iov [%p, %zu] + %lu to " + "sg [%p, %u, %u] + %lu\n", + to_copy, iov->iov_base, iov->iov_len, iov_off, + sg_page(sg), sg->offset, sg->length, vec_off); + + ret = rds_page_copy_to_user(sg_page(sg), sg->offset + vec_off, + iov->iov_base + iov_off, + to_copy); + if (ret) { + copied = ret; + break; + } + + iov_off += to_copy; + vec_off += to_copy; + copied += to_copy; + + if (vec_off == sg->length) { + vec_off = 0; + sg++; + } + } + + return copied; +} + +/* + * If the message is still on the send queue, wait until the transport + * is done with it. This is particularly important for RDMA operations. + */ +void rds_message_wait(struct rds_message *rm) +{ + wait_event(rds_message_flush_waitq, + !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); +} + +void rds_message_unmapped(struct rds_message *rm) +{ + clear_bit(RDS_MSG_MAPPED, &rm->m_flags); + if (waitqueue_active(&rds_message_flush_waitq)) + wake_up(&rds_message_flush_waitq); +} + diff --git a/net/rds/page.c b/net/rds/page.c new file mode 100644 index 000000000000..c460743a89ad --- /dev/null +++ b/net/rds/page.c @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "rds.h" + +struct rds_page_remainder { + struct page *r_page; + unsigned long r_offset; +}; + +DEFINE_PER_CPU(struct rds_page_remainder, rds_page_remainders) ____cacheline_aligned; + +/* + * returns 0 on success or -errno on failure. + * + * We don't have to worry about flush_dcache_page() as this only works + * with private pages. If, say, we were to do directed receive to pinned + * user pages we'd have to worry more about cache coherence. (Though + * the flush_dcache_page() in get_user_pages() would probably be enough). + */ +int rds_page_copy_user(struct page *page, unsigned long offset, + void __user *ptr, unsigned long bytes, + int to_user) +{ + unsigned long ret; + void *addr; + + if (to_user) + rds_stats_add(s_copy_to_user, bytes); + else + rds_stats_add(s_copy_from_user, bytes); + + addr = kmap_atomic(page, KM_USER0); + if (to_user) + ret = __copy_to_user_inatomic(ptr, addr + offset, bytes); + else + ret = __copy_from_user_inatomic(addr + offset, ptr, bytes); + kunmap_atomic(addr, KM_USER0); + + if (ret) { + addr = kmap(page); + if (to_user) + ret = copy_to_user(ptr, addr + offset, bytes); + else + ret = copy_from_user(addr + offset, ptr, bytes); + kunmap(page); + if (ret) + return -EFAULT; + } + + return 0; +} + +/* + * Message allocation uses this to build up regions of a message. + * + * @bytes - the number of bytes needed. + * @gfp - the waiting behaviour of the allocation + * + * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to + * kmap the pages, etc. + * + * If @bytes is at least a full page then this just returns a page from + * alloc_page(). + * + * If @bytes is a partial page then this stores the unused region of the + * page in a per-cpu structure. Future partial-page allocations may be + * satisfied from that cached region. This lets us waste less memory on + * small allocations with minimal complexity. It works because the transmit + * path passes read-only page regions down to devices. They hold a page + * reference until they are done with the region. + */ +int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, + gfp_t gfp) +{ + struct rds_page_remainder *rem; + unsigned long flags; + struct page *page; + int ret; + + gfp |= __GFP_HIGHMEM; + + /* jump straight to allocation if we're trying for a huge page */ + if (bytes >= PAGE_SIZE) { + page = alloc_page(gfp); + if (page == NULL) { + ret = -ENOMEM; + } else { + sg_set_page(scat, page, PAGE_SIZE, 0); + ret = 0; + } + goto out; + } + + rem = &per_cpu(rds_page_remainders, get_cpu()); + local_irq_save(flags); + + while (1) { + /* avoid a tiny region getting stuck by tossing it */ + if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) { + rds_stats_inc(s_page_remainder_miss); + __free_page(rem->r_page); + rem->r_page = NULL; + } + + /* hand out a fragment from the cached page */ + if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) { + sg_set_page(scat, rem->r_page, bytes, rem->r_offset); + get_page(sg_page(scat)); + + if (rem->r_offset != 0) + rds_stats_inc(s_page_remainder_hit); + + rem->r_offset += bytes; + if (rem->r_offset == PAGE_SIZE) { + __free_page(rem->r_page); + rem->r_page = NULL; + } + ret = 0; + break; + } + + /* alloc if there is nothing for us to use */ + local_irq_restore(flags); + put_cpu(); + + page = alloc_page(gfp); + + rem = &per_cpu(rds_page_remainders, get_cpu()); + local_irq_save(flags); + + if (page == NULL) { + ret = -ENOMEM; + break; + } + + /* did someone race to fill the remainder before us? */ + if (rem->r_page) { + __free_page(page); + continue; + } + + /* otherwise install our page and loop around to alloc */ + rem->r_page = page; + rem->r_offset = 0; + } + + local_irq_restore(flags); + put_cpu(); +out: + rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret, + ret ? NULL : sg_page(scat), ret ? 0 : scat->offset, + ret ? 0 : scat->length); + return ret; +} + +static int rds_page_remainder_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + struct rds_page_remainder *rem; + long cpu = (long)hcpu; + + rem = &per_cpu(rds_page_remainders, cpu); + + rdsdebug("cpu %ld action 0x%lx\n", cpu, action); + + switch (action) { + case CPU_DEAD: + if (rem->r_page) + __free_page(rem->r_page); + rem->r_page = NULL; + break; + } + + return 0; +} + +static struct notifier_block rds_page_remainder_nb = { + .notifier_call = rds_page_remainder_cpu_notify, +}; + +void rds_page_exit(void) +{ + int i; + + for_each_possible_cpu(i) + rds_page_remainder_cpu_notify(&rds_page_remainder_nb, + (unsigned long)CPU_DEAD, + (void *)(long)i); +} -- cgit v1.2.3 From 5c11559046c4b3498d1977a029de8a312eacce35 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:27 +0000 Subject: RDS: send.c This is the code to send an RDS datagram. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/send.c | 1003 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1003 insertions(+) create mode 100644 net/rds/send.c (limited to 'net') diff --git a/net/rds/send.c b/net/rds/send.c new file mode 100644 index 000000000000..1b37364656f0 --- /dev/null +++ b/net/rds/send.c @@ -0,0 +1,1003 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include + +#include "rds.h" +#include "rdma.h" + +/* When transmitting messages in rds_send_xmit, we need to emerge from + * time to time and briefly release the CPU. Otherwise the softlock watchdog + * will kick our shin. + * Also, it seems fairer to not let one busy connection stall all the + * others. + * + * send_batch_count is the number of times we'll loop in send_xmit. Setting + * it to 0 will restore the old behavior (where we looped until we had + * drained the queue). + */ +static int send_batch_count = 64; +module_param(send_batch_count, int, 0444); +MODULE_PARM_DESC(send_batch_count, " batch factor when working the send queue"); + +/* + * Reset the send state. Caller must hold c_send_lock when calling here. + */ +void rds_send_reset(struct rds_connection *conn) +{ + struct rds_message *rm, *tmp; + unsigned long flags; + + if (conn->c_xmit_rm) { + /* Tell the user the RDMA op is no longer mapped by the + * transport. This isn't entirely true (it's flushed out + * independently) but as the connection is down, there's + * no ongoing RDMA to/from that memory */ + rds_message_unmapped(conn->c_xmit_rm); + rds_message_put(conn->c_xmit_rm); + conn->c_xmit_rm = NULL; + } + conn->c_xmit_sg = 0; + conn->c_xmit_hdr_off = 0; + conn->c_xmit_data_off = 0; + conn->c_xmit_rdma_sent = 0; + + conn->c_map_queued = 0; + + conn->c_unacked_packets = rds_sysctl_max_unacked_packets; + conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes; + + /* Mark messages as retransmissions, and move them to the send q */ + spin_lock_irqsave(&conn->c_lock, flags); + list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { + set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); + set_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags); + } + list_splice_init(&conn->c_retrans, &conn->c_send_queue); + spin_unlock_irqrestore(&conn->c_lock, flags); +} + +/* + * We're making the concious trade-off here to only send one message + * down the connection at a time. + * Pro: + * - tx queueing is a simple fifo list + * - reassembly is optional and easily done by transports per conn + * - no per flow rx lookup at all, straight to the socket + * - less per-frag memory and wire overhead + * Con: + * - queued acks can be delayed behind large messages + * Depends: + * - small message latency is higher behind queued large messages + * - large message latency isn't starved by intervening small sends + */ +int rds_send_xmit(struct rds_connection *conn) +{ + struct rds_message *rm; + unsigned long flags; + unsigned int tmp; + unsigned int send_quota = send_batch_count; + struct scatterlist *sg; + int ret = 0; + int was_empty = 0; + LIST_HEAD(to_be_dropped); + + /* + * sendmsg calls here after having queued its message on the send + * queue. We only have one task feeding the connection at a time. If + * another thread is already feeding the queue then we back off. This + * avoids blocking the caller and trading per-connection data between + * caches per message. + * + * The sem holder will issue a retry if they notice that someone queued + * a message after they stopped walking the send queue but before they + * dropped the sem. + */ + if (!mutex_trylock(&conn->c_send_lock)) { + rds_stats_inc(s_send_sem_contention); + ret = -ENOMEM; + goto out; + } + + if (conn->c_trans->xmit_prepare) + conn->c_trans->xmit_prepare(conn); + + /* + * spin trying to push headers and data down the connection until + * the connection doens't make forward progress. + */ + while (--send_quota) { + /* + * See if need to send a congestion map update if we're + * between sending messages. The send_sem protects our sole + * use of c_map_offset and _bytes. + * Note this is used only by transports that define a special + * xmit_cong_map function. For all others, we create allocate + * a cong_map message and treat it just like any other send. + */ + if (conn->c_map_bytes) { + ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong, + conn->c_map_offset); + if (ret <= 0) + break; + + conn->c_map_offset += ret; + conn->c_map_bytes -= ret; + if (conn->c_map_bytes) + continue; + } + + /* If we're done sending the current message, clear the + * offset and S/G temporaries. + */ + rm = conn->c_xmit_rm; + if (rm != NULL && + conn->c_xmit_hdr_off == sizeof(struct rds_header) && + conn->c_xmit_sg == rm->m_nents) { + conn->c_xmit_rm = NULL; + conn->c_xmit_sg = 0; + conn->c_xmit_hdr_off = 0; + conn->c_xmit_data_off = 0; + conn->c_xmit_rdma_sent = 0; + + /* Release the reference to the previous message. */ + rds_message_put(rm); + rm = NULL; + } + + /* If we're asked to send a cong map update, do so. + */ + if (rm == NULL && test_and_clear_bit(0, &conn->c_map_queued)) { + if (conn->c_trans->xmit_cong_map != NULL) { + conn->c_map_offset = 0; + conn->c_map_bytes = sizeof(struct rds_header) + + RDS_CONG_MAP_BYTES; + continue; + } + + rm = rds_cong_update_alloc(conn); + if (IS_ERR(rm)) { + ret = PTR_ERR(rm); + break; + } + + conn->c_xmit_rm = rm; + } + + /* + * Grab the next message from the send queue, if there is one. + * + * c_xmit_rm holds a ref while we're sending this message down + * the connction. We can use this ref while holding the + * send_sem.. rds_send_reset() is serialized with it. + */ + if (rm == NULL) { + unsigned int len; + + spin_lock_irqsave(&conn->c_lock, flags); + + if (!list_empty(&conn->c_send_queue)) { + rm = list_entry(conn->c_send_queue.next, + struct rds_message, + m_conn_item); + rds_message_addref(rm); + + /* + * Move the message from the send queue to the retransmit + * list right away. + */ + list_move_tail(&rm->m_conn_item, &conn->c_retrans); + } + + spin_unlock_irqrestore(&conn->c_lock, flags); + + if (rm == NULL) { + was_empty = 1; + break; + } + + /* Unfortunately, the way Infiniband deals with + * RDMA to a bad MR key is by moving the entire + * queue pair to error state. We cold possibly + * recover from that, but right now we drop the + * connection. + * Therefore, we never retransmit messages with RDMA ops. + */ + if (rm->m_rdma_op + && test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) { + spin_lock_irqsave(&conn->c_lock, flags); + if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) + list_move(&rm->m_conn_item, &to_be_dropped); + spin_unlock_irqrestore(&conn->c_lock, flags); + rds_message_put(rm); + continue; + } + + /* Require an ACK every once in a while */ + len = ntohl(rm->m_inc.i_hdr.h_len); + if (conn->c_unacked_packets == 0 + || conn->c_unacked_bytes < len) { + __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); + + conn->c_unacked_packets = rds_sysctl_max_unacked_packets; + conn->c_unacked_bytes = rds_sysctl_max_unacked_bytes; + rds_stats_inc(s_send_ack_required); + } else { + conn->c_unacked_bytes -= len; + conn->c_unacked_packets--; + } + + conn->c_xmit_rm = rm; + } + + /* + * Try and send an rdma message. Let's see if we can + * keep this simple and require that the transport either + * send the whole rdma or none of it. + */ + if (rm->m_rdma_op && !conn->c_xmit_rdma_sent) { + ret = conn->c_trans->xmit_rdma(conn, rm->m_rdma_op); + if (ret) + break; + conn->c_xmit_rdma_sent = 1; + /* The transport owns the mapped memory for now. + * You can't unmap it while it's on the send queue */ + set_bit(RDS_MSG_MAPPED, &rm->m_flags); + } + + if (conn->c_xmit_hdr_off < sizeof(struct rds_header) || + conn->c_xmit_sg < rm->m_nents) { + ret = conn->c_trans->xmit(conn, rm, + conn->c_xmit_hdr_off, + conn->c_xmit_sg, + conn->c_xmit_data_off); + if (ret <= 0) + break; + + if (conn->c_xmit_hdr_off < sizeof(struct rds_header)) { + tmp = min_t(int, ret, + sizeof(struct rds_header) - + conn->c_xmit_hdr_off); + conn->c_xmit_hdr_off += tmp; + ret -= tmp; + } + + sg = &rm->m_sg[conn->c_xmit_sg]; + while (ret) { + tmp = min_t(int, ret, sg->length - + conn->c_xmit_data_off); + conn->c_xmit_data_off += tmp; + ret -= tmp; + if (conn->c_xmit_data_off == sg->length) { + conn->c_xmit_data_off = 0; + sg++; + conn->c_xmit_sg++; + BUG_ON(ret != 0 && + conn->c_xmit_sg == rm->m_nents); + } + } + } + } + + /* Nuke any messages we decided not to retransmit. */ + if (!list_empty(&to_be_dropped)) + rds_send_remove_from_sock(&to_be_dropped, RDS_RDMA_DROPPED); + + if (conn->c_trans->xmit_complete) + conn->c_trans->xmit_complete(conn); + + /* + * We might be racing with another sender who queued a message but + * backed off on noticing that we held the c_send_lock. If we check + * for queued messages after dropping the sem then either we'll + * see the queued message or the queuer will get the sem. If we + * notice the queued message then we trigger an immediate retry. + * + * We need to be careful only to do this when we stopped processing + * the send queue because it was empty. It's the only way we + * stop processing the loop when the transport hasn't taken + * responsibility for forward progress. + */ + mutex_unlock(&conn->c_send_lock); + + if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) { + /* We exhausted the send quota, but there's work left to + * do. Return and (re-)schedule the send worker. + */ + ret = -EAGAIN; + } + + if (ret == 0 && was_empty) { + /* A simple bit test would be way faster than taking the + * spin lock */ + spin_lock_irqsave(&conn->c_lock, flags); + if (!list_empty(&conn->c_send_queue)) { + rds_stats_inc(s_send_sem_queue_raced); + ret = -EAGAIN; + } + spin_unlock_irqrestore(&conn->c_lock, flags); + } +out: + return ret; +} + +static void rds_send_sndbuf_remove(struct rds_sock *rs, struct rds_message *rm) +{ + u32 len = be32_to_cpu(rm->m_inc.i_hdr.h_len); + + assert_spin_locked(&rs->rs_lock); + + BUG_ON(rs->rs_snd_bytes < len); + rs->rs_snd_bytes -= len; + + if (rs->rs_snd_bytes == 0) + rds_stats_inc(s_send_queue_empty); +} + +static inline int rds_send_is_acked(struct rds_message *rm, u64 ack, + is_acked_func is_acked) +{ + if (is_acked) + return is_acked(rm, ack); + return be64_to_cpu(rm->m_inc.i_hdr.h_sequence) <= ack; +} + +/* + * Returns true if there are no messages on the send and retransmit queues + * which have a sequence number greater than or equal to the given sequence + * number. + */ +int rds_send_acked_before(struct rds_connection *conn, u64 seq) +{ + struct rds_message *rm, *tmp; + int ret = 1; + + spin_lock(&conn->c_lock); + + list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { + if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) + ret = 0; + break; + } + + list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { + if (be64_to_cpu(rm->m_inc.i_hdr.h_sequence) < seq) + ret = 0; + break; + } + + spin_unlock(&conn->c_lock); + + return ret; +} + +/* + * This is pretty similar to what happens below in the ACK + * handling code - except that we call here as soon as we get + * the IB send completion on the RDMA op and the accompanying + * message. + */ +void rds_rdma_send_complete(struct rds_message *rm, int status) +{ + struct rds_sock *rs = NULL; + struct rds_rdma_op *ro; + struct rds_notifier *notifier; + + spin_lock(&rm->m_rs_lock); + + ro = rm->m_rdma_op; + if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) + && ro && ro->r_notify && ro->r_notifier) { + notifier = ro->r_notifier; + rs = rm->m_rs; + sock_hold(rds_rs_to_sk(rs)); + + notifier->n_status = status; + spin_lock(&rs->rs_lock); + list_add_tail(¬ifier->n_list, &rs->rs_notify_queue); + spin_unlock(&rs->rs_lock); + + ro->r_notifier = NULL; + } + + spin_unlock(&rm->m_rs_lock); + + if (rs) { + rds_wake_sk_sleep(rs); + sock_put(rds_rs_to_sk(rs)); + } +} + +/* + * This is the same as rds_rdma_send_complete except we + * don't do any locking - we have all the ingredients (message, + * socket, socket lock) and can just move the notifier. + */ +static inline void +__rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status) +{ + struct rds_rdma_op *ro; + + ro = rm->m_rdma_op; + if (ro && ro->r_notify && ro->r_notifier) { + ro->r_notifier->n_status = status; + list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue); + ro->r_notifier = NULL; + } + + /* No need to wake the app - caller does this */ +} + +/* + * This is called from the IB send completion when we detect + * a RDMA operation that failed with remote access error. + * So speed is not an issue here. + */ +struct rds_message *rds_send_get_message(struct rds_connection *conn, + struct rds_rdma_op *op) +{ + struct rds_message *rm, *tmp, *found = NULL; + unsigned long flags; + + spin_lock_irqsave(&conn->c_lock, flags); + + list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { + if (rm->m_rdma_op == op) { + atomic_inc(&rm->m_refcount); + found = rm; + goto out; + } + } + + list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) { + if (rm->m_rdma_op == op) { + atomic_inc(&rm->m_refcount); + found = rm; + break; + } + } + +out: + spin_unlock_irqrestore(&conn->c_lock, flags); + + return found; +} + +/* + * This removes messages from the socket's list if they're on it. The list + * argument must be private to the caller, we must be able to modify it + * without locks. The messages must have a reference held for their + * position on the list. This function will drop that reference after + * removing the messages from the 'messages' list regardless of if it found + * the messages on the socket list or not. + */ +void rds_send_remove_from_sock(struct list_head *messages, int status) +{ + unsigned long flags = 0; /* silence gcc :P */ + struct rds_sock *rs = NULL; + struct rds_message *rm; + + local_irq_save(flags); + while (!list_empty(messages)) { + rm = list_entry(messages->next, struct rds_message, + m_conn_item); + list_del_init(&rm->m_conn_item); + + /* + * If we see this flag cleared then we're *sure* that someone + * else beat us to removing it from the sock. If we race + * with their flag update we'll get the lock and then really + * see that the flag has been cleared. + * + * The message spinlock makes sure nobody clears rm->m_rs + * while we're messing with it. It does not prevent the + * message from being removed from the socket, though. + */ + spin_lock(&rm->m_rs_lock); + if (!test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) + goto unlock_and_drop; + + if (rs != rm->m_rs) { + if (rs) { + spin_unlock(&rs->rs_lock); + rds_wake_sk_sleep(rs); + sock_put(rds_rs_to_sk(rs)); + } + rs = rm->m_rs; + spin_lock(&rs->rs_lock); + sock_hold(rds_rs_to_sk(rs)); + } + + if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { + struct rds_rdma_op *ro = rm->m_rdma_op; + struct rds_notifier *notifier; + + list_del_init(&rm->m_sock_item); + rds_send_sndbuf_remove(rs, rm); + + if (ro && ro->r_notifier + && (status || ro->r_notify)) { + notifier = ro->r_notifier; + list_add_tail(¬ifier->n_list, + &rs->rs_notify_queue); + if (!notifier->n_status) + notifier->n_status = status; + rm->m_rdma_op->r_notifier = NULL; + } + rds_message_put(rm); + rm->m_rs = NULL; + } + +unlock_and_drop: + spin_unlock(&rm->m_rs_lock); + rds_message_put(rm); + } + + if (rs) { + spin_unlock(&rs->rs_lock); + rds_wake_sk_sleep(rs); + sock_put(rds_rs_to_sk(rs)); + } + local_irq_restore(flags); +} + +/* + * Transports call here when they've determined that the receiver queued + * messages up to, and including, the given sequence number. Messages are + * moved to the retrans queue when rds_send_xmit picks them off the send + * queue. This means that in the TCP case, the message may not have been + * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked + * checks the RDS_MSG_HAS_ACK_SEQ bit. + * + * XXX It's not clear to me how this is safely serialized with socket + * destruction. Maybe it should bail if it sees SOCK_DEAD. + */ +void rds_send_drop_acked(struct rds_connection *conn, u64 ack, + is_acked_func is_acked) +{ + struct rds_message *rm, *tmp; + unsigned long flags; + LIST_HEAD(list); + + spin_lock_irqsave(&conn->c_lock, flags); + + list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) { + if (!rds_send_is_acked(rm, ack, is_acked)) + break; + + list_move(&rm->m_conn_item, &list); + clear_bit(RDS_MSG_ON_CONN, &rm->m_flags); + } + + /* order flag updates with spin locks */ + if (!list_empty(&list)) + smp_mb__after_clear_bit(); + + spin_unlock_irqrestore(&conn->c_lock, flags); + + /* now remove the messages from the sock list as needed */ + rds_send_remove_from_sock(&list, RDS_RDMA_SUCCESS); +} + +void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) +{ + struct rds_message *rm, *tmp; + struct rds_connection *conn; + unsigned long flags; + LIST_HEAD(list); + int wake = 0; + + /* get all the messages we're dropping under the rs lock */ + spin_lock_irqsave(&rs->rs_lock, flags); + + list_for_each_entry_safe(rm, tmp, &rs->rs_send_queue, m_sock_item) { + if (dest && (dest->sin_addr.s_addr != rm->m_daddr || + dest->sin_port != rm->m_inc.i_hdr.h_dport)) + continue; + + wake = 1; + list_move(&rm->m_sock_item, &list); + rds_send_sndbuf_remove(rs, rm); + clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags); + + /* If this is a RDMA operation, notify the app. */ + __rds_rdma_send_complete(rs, rm, RDS_RDMA_CANCELED); + } + + /* order flag updates with the rs lock */ + if (wake) + smp_mb__after_clear_bit(); + + spin_unlock_irqrestore(&rs->rs_lock, flags); + + if (wake) + rds_wake_sk_sleep(rs); + + conn = NULL; + + /* now remove the messages from the conn list as needed */ + list_for_each_entry(rm, &list, m_sock_item) { + /* We do this here rather than in the loop above, so that + * we don't have to nest m_rs_lock under rs->rs_lock */ + spin_lock(&rm->m_rs_lock); + rm->m_rs = NULL; + spin_unlock(&rm->m_rs_lock); + + /* + * If we see this flag cleared then we're *sure* that someone + * else beat us to removing it from the conn. If we race + * with their flag update we'll get the lock and then really + * see that the flag has been cleared. + */ + if (!test_bit(RDS_MSG_ON_CONN, &rm->m_flags)) + continue; + + if (conn != rm->m_inc.i_conn) { + if (conn) + spin_unlock_irqrestore(&conn->c_lock, flags); + conn = rm->m_inc.i_conn; + spin_lock_irqsave(&conn->c_lock, flags); + } + + if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { + list_del_init(&rm->m_conn_item); + rds_message_put(rm); + } + } + + if (conn) + spin_unlock_irqrestore(&conn->c_lock, flags); + + while (!list_empty(&list)) { + rm = list_entry(list.next, struct rds_message, m_sock_item); + list_del_init(&rm->m_sock_item); + + rds_message_wait(rm); + rds_message_put(rm); + } +} + +/* + * we only want this to fire once so we use the callers 'queued'. It's + * possible that another thread can race with us and remove the + * message from the flow with RDS_CANCEL_SENT_TO. + */ +static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn, + struct rds_message *rm, __be16 sport, + __be16 dport, int *queued) +{ + unsigned long flags; + u32 len; + + if (*queued) + goto out; + + len = be32_to_cpu(rm->m_inc.i_hdr.h_len); + + /* this is the only place which holds both the socket's rs_lock + * and the connection's c_lock */ + spin_lock_irqsave(&rs->rs_lock, flags); + + /* + * If there is a little space in sndbuf, we don't queue anything, + * and userspace gets -EAGAIN. But poll() indicates there's send + * room. This can lead to bad behavior (spinning) if snd_bytes isn't + * freed up by incoming acks. So we check the *old* value of + * rs_snd_bytes here to allow the last msg to exceed the buffer, + * and poll() now knows no more data can be sent. + */ + if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) { + rs->rs_snd_bytes += len; + + /* let recv side know we are close to send space exhaustion. + * This is probably not the optimal way to do it, as this + * means we set the flag on *all* messages as soon as our + * throughput hits a certain threshold. + */ + if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2) + __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); + + list_add_tail(&rm->m_sock_item, &rs->rs_send_queue); + set_bit(RDS_MSG_ON_SOCK, &rm->m_flags); + rds_message_addref(rm); + rm->m_rs = rs; + + /* The code ordering is a little weird, but we're + trying to minimize the time we hold c_lock */ + rds_message_populate_header(&rm->m_inc.i_hdr, sport, dport, 0); + rm->m_inc.i_conn = conn; + rds_message_addref(rm); + + spin_lock(&conn->c_lock); + rm->m_inc.i_hdr.h_sequence = cpu_to_be64(conn->c_next_tx_seq++); + list_add_tail(&rm->m_conn_item, &conn->c_send_queue); + set_bit(RDS_MSG_ON_CONN, &rm->m_flags); + spin_unlock(&conn->c_lock); + + rdsdebug("queued msg %p len %d, rs %p bytes %d seq %llu\n", + rm, len, rs, rs->rs_snd_bytes, + (unsigned long long)be64_to_cpu(rm->m_inc.i_hdr.h_sequence)); + + *queued = 1; + } + + spin_unlock_irqrestore(&rs->rs_lock, flags); +out: + return *queued; +} + +static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, + struct msghdr *msg, int *allocated_mr) +{ + struct cmsghdr *cmsg; + int ret = 0; + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + if (cmsg->cmsg_level != SOL_RDS) + continue; + + /* As a side effect, RDMA_DEST and RDMA_MAP will set + * rm->m_rdma_cookie and rm->m_rdma_mr. + */ + switch (cmsg->cmsg_type) { + case RDS_CMSG_RDMA_ARGS: + ret = rds_cmsg_rdma_args(rs, rm, cmsg); + break; + + case RDS_CMSG_RDMA_DEST: + ret = rds_cmsg_rdma_dest(rs, rm, cmsg); + break; + + case RDS_CMSG_RDMA_MAP: + ret = rds_cmsg_rdma_map(rs, rm, cmsg); + if (!ret) + *allocated_mr = 1; + break; + + default: + return -EINVAL; + } + + if (ret) + break; + } + + return ret; +} + +int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, + size_t payload_len) +{ + struct sock *sk = sock->sk; + struct rds_sock *rs = rds_sk_to_rs(sk); + struct sockaddr_in *usin = (struct sockaddr_in *)msg->msg_name; + __be32 daddr; + __be16 dport; + struct rds_message *rm = NULL; + struct rds_connection *conn; + int ret = 0; + int queued = 0, allocated_mr = 0; + int nonblock = msg->msg_flags & MSG_DONTWAIT; + long timeo = sock_rcvtimeo(sk, nonblock); + + /* Mirror Linux UDP mirror of BSD error message compatibility */ + /* XXX: Perhaps MSG_MORE someday */ + if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) { + printk(KERN_INFO "msg_flags 0x%08X\n", msg->msg_flags); + ret = -EOPNOTSUPP; + goto out; + } + + if (msg->msg_namelen) { + /* XXX fail non-unicast destination IPs? */ + if (msg->msg_namelen < sizeof(*usin) || usin->sin_family != AF_INET) { + ret = -EINVAL; + goto out; + } + daddr = usin->sin_addr.s_addr; + dport = usin->sin_port; + } else { + /* We only care about consistency with ->connect() */ + lock_sock(sk); + daddr = rs->rs_conn_addr; + dport = rs->rs_conn_port; + release_sock(sk); + } + + /* racing with another thread binding seems ok here */ + if (daddr == 0 || rs->rs_bound_addr == 0) { + ret = -ENOTCONN; /* XXX not a great errno */ + goto out; + } + + rm = rds_message_copy_from_user(msg->msg_iov, payload_len); + if (IS_ERR(rm)) { + ret = PTR_ERR(rm); + rm = NULL; + goto out; + } + + rm->m_daddr = daddr; + + /* Parse any control messages the user may have included. */ + ret = rds_cmsg_send(rs, rm, msg, &allocated_mr); + if (ret) + goto out; + + /* rds_conn_create has a spinlock that runs with IRQ off. + * Caching the conn in the socket helps a lot. */ + if (rs->rs_conn && rs->rs_conn->c_faddr == daddr) + conn = rs->rs_conn; + else { + conn = rds_conn_create_outgoing(rs->rs_bound_addr, daddr, + rs->rs_transport, + sock->sk->sk_allocation); + if (IS_ERR(conn)) { + ret = PTR_ERR(conn); + goto out; + } + rs->rs_conn = conn; + } + + if ((rm->m_rdma_cookie || rm->m_rdma_op) + && conn->c_trans->xmit_rdma == NULL) { + if (printk_ratelimit()) + printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n", + rm->m_rdma_op, conn->c_trans->xmit_rdma); + ret = -EOPNOTSUPP; + goto out; + } + + /* If the connection is down, trigger a connect. We may + * have scheduled a delayed reconnect however - in this case + * we should not interfere. + */ + if (rds_conn_state(conn) == RDS_CONN_DOWN + && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) + queue_delayed_work(rds_wq, &conn->c_conn_w, 0); + + ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); + if (ret) + goto out; + + while (!rds_send_queue_rm(rs, conn, rm, rs->rs_bound_port, + dport, &queued)) { + rds_stats_inc(s_send_queue_full); + /* XXX make sure this is reasonable */ + if (payload_len > rds_sk_sndbuf(rs)) { + ret = -EMSGSIZE; + goto out; + } + if (nonblock) { + ret = -EAGAIN; + goto out; + } + + timeo = wait_event_interruptible_timeout(*sk->sk_sleep, + rds_send_queue_rm(rs, conn, rm, + rs->rs_bound_port, + dport, + &queued), + timeo); + rdsdebug("sendmsg woke queued %d timeo %ld\n", queued, timeo); + if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) + continue; + + ret = timeo; + if (ret == 0) + ret = -ETIMEDOUT; + goto out; + } + + /* + * By now we've committed to the send. We reuse rds_send_worker() + * to retry sends in the rds thread if the transport asks us to. + */ + rds_stats_inc(s_send_queued); + + if (!test_bit(RDS_LL_SEND_FULL, &conn->c_flags)) + rds_send_worker(&conn->c_send_w.work); + + rds_message_put(rm); + return payload_len; + +out: + /* If the user included a RDMA_MAP cmsg, we allocated a MR on the fly. + * If the sendmsg goes through, we keep the MR. If it fails with EAGAIN + * or in any other way, we need to destroy the MR again */ + if (allocated_mr) + rds_rdma_unuse(rs, rds_rdma_cookie_key(rm->m_rdma_cookie), 1); + + if (rm) + rds_message_put(rm); + return ret; +} + +/* + * Reply to a ping packet. + */ +int +rds_send_pong(struct rds_connection *conn, __be16 dport) +{ + struct rds_message *rm; + unsigned long flags; + int ret = 0; + + rm = rds_message_alloc(0, GFP_ATOMIC); + if (rm == NULL) { + ret = -ENOMEM; + goto out; + } + + rm->m_daddr = conn->c_faddr; + + /* If the connection is down, trigger a connect. We may + * have scheduled a delayed reconnect however - in this case + * we should not interfere. + */ + if (rds_conn_state(conn) == RDS_CONN_DOWN + && !test_and_set_bit(RDS_RECONNECT_PENDING, &conn->c_flags)) + queue_delayed_work(rds_wq, &conn->c_conn_w, 0); + + ret = rds_cong_wait(conn->c_fcong, dport, 1, NULL); + if (ret) + goto out; + + spin_lock_irqsave(&conn->c_lock, flags); + list_add_tail(&rm->m_conn_item, &conn->c_send_queue); + set_bit(RDS_MSG_ON_CONN, &rm->m_flags); + rds_message_addref(rm); + rm->m_inc.i_conn = conn; + + rds_message_populate_header(&rm->m_inc.i_hdr, 0, dport, + conn->c_next_tx_seq); + conn->c_next_tx_seq++; + spin_unlock_irqrestore(&conn->c_lock, flags); + + rds_stats_inc(s_send_queued); + rds_stats_inc(s_send_pong); + + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + rds_message_put(rm); + return 0; + +out: + if (rm) + rds_message_put(rm); + return ret; +} -- cgit v1.2.3 From bdbe6fbc6a2f2ccfb384b141b257677d2a8d36fb Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:28 +0000 Subject: RDS: recv.c Upon receiving a datagram from the transport, RDS parses the headers and potentially queues an ACK. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/recv.c | 542 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 542 insertions(+) create mode 100644 net/rds/recv.c (limited to 'net') diff --git a/net/rds/recv.c b/net/rds/recv.c new file mode 100644 index 000000000000..f2118c51cfa3 --- /dev/null +++ b/net/rds/recv.c @@ -0,0 +1,542 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "rdma.h" + +void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, + __be32 saddr) +{ + atomic_set(&inc->i_refcount, 1); + INIT_LIST_HEAD(&inc->i_item); + inc->i_conn = conn; + inc->i_saddr = saddr; + inc->i_rdma_cookie = 0; +} + +void rds_inc_addref(struct rds_incoming *inc) +{ + rdsdebug("addref inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); + atomic_inc(&inc->i_refcount); +} + +void rds_inc_put(struct rds_incoming *inc) +{ + rdsdebug("put inc %p ref %d\n", inc, atomic_read(&inc->i_refcount)); + if (atomic_dec_and_test(&inc->i_refcount)) { + BUG_ON(!list_empty(&inc->i_item)); + + inc->i_conn->c_trans->inc_free(inc); + } +} + +static void rds_recv_rcvbuf_delta(struct rds_sock *rs, struct sock *sk, + struct rds_cong_map *map, + int delta, __be16 port) +{ + int now_congested; + + if (delta == 0) + return; + + rs->rs_rcv_bytes += delta; + now_congested = rs->rs_rcv_bytes > rds_sk_rcvbuf(rs); + + rdsdebug("rs %p (%pI4:%u) recv bytes %d buf %d " + "now_cong %d delta %d\n", + rs, &rs->rs_bound_addr, + ntohs(rs->rs_bound_port), rs->rs_rcv_bytes, + rds_sk_rcvbuf(rs), now_congested, delta); + + /* wasn't -> am congested */ + if (!rs->rs_congested && now_congested) { + rs->rs_congested = 1; + rds_cong_set_bit(map, port); + rds_cong_queue_updates(map); + } + /* was -> aren't congested */ + /* Require more free space before reporting uncongested to prevent + bouncing cong/uncong state too often */ + else if (rs->rs_congested && (rs->rs_rcv_bytes < (rds_sk_rcvbuf(rs)/2))) { + rs->rs_congested = 0; + rds_cong_clear_bit(map, port); + rds_cong_queue_updates(map); + } + + /* do nothing if no change in cong state */ +} + +/* + * Process all extension headers that come with this message. + */ +static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock *rs) +{ + struct rds_header *hdr = &inc->i_hdr; + unsigned int pos = 0, type, len; + union { + struct rds_ext_header_version version; + struct rds_ext_header_rdma rdma; + struct rds_ext_header_rdma_dest rdma_dest; + } buffer; + + while (1) { + len = sizeof(buffer); + type = rds_message_next_extension(hdr, &pos, &buffer, &len); + if (type == RDS_EXTHDR_NONE) + break; + /* Process extension header here */ + switch (type) { + case RDS_EXTHDR_RDMA: + rds_rdma_unuse(rs, be32_to_cpu(buffer.rdma.h_rdma_rkey), 0); + break; + + case RDS_EXTHDR_RDMA_DEST: + /* We ignore the size for now. We could stash it + * somewhere and use it for error checking. */ + inc->i_rdma_cookie = rds_rdma_make_cookie( + be32_to_cpu(buffer.rdma_dest.h_rdma_rkey), + be32_to_cpu(buffer.rdma_dest.h_rdma_offset)); + + break; + } + } +} + +/* + * The transport must make sure that this is serialized against other + * rx and conn reset on this specific conn. + * + * We currently assert that only one fragmented message will be sent + * down a connection at a time. This lets us reassemble in the conn + * instead of per-flow which means that we don't have to go digging through + * flows to tear down partial reassembly progress on conn failure and + * we save flow lookup and locking for each frag arrival. It does mean + * that small messages will wait behind large ones. Fragmenting at all + * is only to reduce the memory consumption of pre-posted buffers. + * + * The caller passes in saddr and daddr instead of us getting it from the + * conn. This lets loopback, who only has one conn for both directions, + * tell us which roles the addrs in the conn are playing for this message. + */ +void rds_recv_incoming(struct rds_connection *conn, __be32 saddr, __be32 daddr, + struct rds_incoming *inc, gfp_t gfp, enum km_type km) +{ + struct rds_sock *rs = NULL; + struct sock *sk; + unsigned long flags; + + inc->i_conn = conn; + inc->i_rx_jiffies = jiffies; + + rdsdebug("conn %p next %llu inc %p seq %llu len %u sport %u dport %u " + "flags 0x%x rx_jiffies %lu\n", conn, + (unsigned long long)conn->c_next_rx_seq, + inc, + (unsigned long long)be64_to_cpu(inc->i_hdr.h_sequence), + be32_to_cpu(inc->i_hdr.h_len), + be16_to_cpu(inc->i_hdr.h_sport), + be16_to_cpu(inc->i_hdr.h_dport), + inc->i_hdr.h_flags, + inc->i_rx_jiffies); + + /* + * Sequence numbers should only increase. Messages get their + * sequence number as they're queued in a sending conn. They + * can be dropped, though, if the sending socket is closed before + * they hit the wire. So sequence numbers can skip forward + * under normal operation. They can also drop back in the conn + * failover case as previously sent messages are resent down the + * new instance of a conn. We drop those, otherwise we have + * to assume that the next valid seq does not come after a + * hole in the fragment stream. + * + * The headers don't give us a way to realize if fragments of + * a message have been dropped. We assume that frags that arrive + * to a flow are part of the current message on the flow that is + * being reassembled. This means that senders can't drop messages + * from the sending conn until all their frags are sent. + * + * XXX we could spend more on the wire to get more robust failure + * detection, arguably worth it to avoid data corruption. + */ + if (be64_to_cpu(inc->i_hdr.h_sequence) < conn->c_next_rx_seq + && (inc->i_hdr.h_flags & RDS_FLAG_RETRANSMITTED)) { + rds_stats_inc(s_recv_drop_old_seq); + goto out; + } + conn->c_next_rx_seq = be64_to_cpu(inc->i_hdr.h_sequence) + 1; + + if (rds_sysctl_ping_enable && inc->i_hdr.h_dport == 0) { + rds_stats_inc(s_recv_ping); + rds_send_pong(conn, inc->i_hdr.h_sport); + goto out; + } + + rs = rds_find_bound(daddr, inc->i_hdr.h_dport); + if (rs == NULL) { + rds_stats_inc(s_recv_drop_no_sock); + goto out; + } + + /* Process extension headers */ + rds_recv_incoming_exthdrs(inc, rs); + + /* We can be racing with rds_release() which marks the socket dead. */ + sk = rds_rs_to_sk(rs); + + /* serialize with rds_release -> sock_orphan */ + write_lock_irqsave(&rs->rs_recv_lock, flags); + if (!sock_flag(sk, SOCK_DEAD)) { + rdsdebug("adding inc %p to rs %p's recv queue\n", inc, rs); + rds_stats_inc(s_recv_queued); + rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, + be32_to_cpu(inc->i_hdr.h_len), + inc->i_hdr.h_dport); + rds_inc_addref(inc); + list_add_tail(&inc->i_item, &rs->rs_recv_queue); + __rds_wake_sk_sleep(sk); + } else { + rds_stats_inc(s_recv_drop_dead_sock); + } + write_unlock_irqrestore(&rs->rs_recv_lock, flags); + +out: + if (rs) + rds_sock_put(rs); +} + +/* + * be very careful here. This is being called as the condition in + * wait_event_*() needs to cope with being called many times. + */ +static int rds_next_incoming(struct rds_sock *rs, struct rds_incoming **inc) +{ + unsigned long flags; + + if (*inc == NULL) { + read_lock_irqsave(&rs->rs_recv_lock, flags); + if (!list_empty(&rs->rs_recv_queue)) { + *inc = list_entry(rs->rs_recv_queue.next, + struct rds_incoming, + i_item); + rds_inc_addref(*inc); + } + read_unlock_irqrestore(&rs->rs_recv_lock, flags); + } + + return *inc != NULL; +} + +static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, + int drop) +{ + struct sock *sk = rds_rs_to_sk(rs); + int ret = 0; + unsigned long flags; + + write_lock_irqsave(&rs->rs_recv_lock, flags); + if (!list_empty(&inc->i_item)) { + ret = 1; + if (drop) { + /* XXX make sure this i_conn is reliable */ + rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, + -be32_to_cpu(inc->i_hdr.h_len), + inc->i_hdr.h_dport); + list_del_init(&inc->i_item); + rds_inc_put(inc); + } + } + write_unlock_irqrestore(&rs->rs_recv_lock, flags); + + rdsdebug("inc %p rs %p still %d dropped %d\n", inc, rs, ret, drop); + return ret; +} + +/* + * Pull errors off the error queue. + * If msghdr is NULL, we will just purge the error queue. + */ +int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) +{ + struct rds_notifier *notifier; + struct rds_rdma_notify cmsg; + unsigned int count = 0, max_messages = ~0U; + unsigned long flags; + LIST_HEAD(copy); + int err = 0; + + + /* put_cmsg copies to user space and thus may sleep. We can't do this + * with rs_lock held, so first grab as many notifications as we can stuff + * in the user provided cmsg buffer. We don't try to copy more, to avoid + * losing notifications - except when the buffer is so small that it wouldn't + * even hold a single notification. Then we give him as much of this single + * msg as we can squeeze in, and set MSG_CTRUNC. + */ + if (msghdr) { + max_messages = msghdr->msg_controllen / CMSG_SPACE(sizeof(cmsg)); + if (!max_messages) + max_messages = 1; + } + + spin_lock_irqsave(&rs->rs_lock, flags); + while (!list_empty(&rs->rs_notify_queue) && count < max_messages) { + notifier = list_entry(rs->rs_notify_queue.next, + struct rds_notifier, n_list); + list_move(¬ifier->n_list, ©); + count++; + } + spin_unlock_irqrestore(&rs->rs_lock, flags); + + if (!count) + return 0; + + while (!list_empty(©)) { + notifier = list_entry(copy.next, struct rds_notifier, n_list); + + if (msghdr) { + cmsg.user_token = notifier->n_user_token; + cmsg.status = notifier->n_status; + + err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_RDMA_STATUS, + sizeof(cmsg), &cmsg); + if (err) + break; + } + + list_del_init(¬ifier->n_list); + kfree(notifier); + } + + /* If we bailed out because of an error in put_cmsg, + * we may be left with one or more notifications that we + * didn't process. Return them to the head of the list. */ + if (!list_empty(©)) { + spin_lock_irqsave(&rs->rs_lock, flags); + list_splice(©, &rs->rs_notify_queue); + spin_unlock_irqrestore(&rs->rs_lock, flags); + } + + return err; +} + +/* + * Queue a congestion notification + */ +static int rds_notify_cong(struct rds_sock *rs, struct msghdr *msghdr) +{ + uint64_t notify = rs->rs_cong_notify; + unsigned long flags; + int err; + + err = put_cmsg(msghdr, SOL_RDS, RDS_CMSG_CONG_UPDATE, + sizeof(notify), ¬ify); + if (err) + return err; + + spin_lock_irqsave(&rs->rs_lock, flags); + rs->rs_cong_notify &= ~notify; + spin_unlock_irqrestore(&rs->rs_lock, flags); + + return 0; +} + +/* + * Receive any control messages. + */ +static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg) +{ + int ret = 0; + + if (inc->i_rdma_cookie) { + ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, + sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); + if (ret) + return ret; + } + + return 0; +} + +int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, + size_t size, int msg_flags) +{ + struct sock *sk = sock->sk; + struct rds_sock *rs = rds_sk_to_rs(sk); + long timeo; + int ret = 0, nonblock = msg_flags & MSG_DONTWAIT; + struct sockaddr_in *sin; + struct rds_incoming *inc = NULL; + + /* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */ + timeo = sock_rcvtimeo(sk, nonblock); + + rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); + + if (msg_flags & MSG_OOB) + goto out; + + /* If there are pending notifications, do those - and nothing else */ + if (!list_empty(&rs->rs_notify_queue)) { + ret = rds_notify_queue_get(rs, msg); + goto out; + } + + if (rs->rs_cong_notify) { + ret = rds_notify_cong(rs, msg); + goto out; + } + + while (1) { + if (!rds_next_incoming(rs, &inc)) { + if (nonblock) { + ret = -EAGAIN; + break; + } + + timeo = wait_event_interruptible_timeout(*sk->sk_sleep, + rds_next_incoming(rs, &inc), + timeo); + rdsdebug("recvmsg woke inc %p timeo %ld\n", inc, + timeo); + if (timeo > 0 || timeo == MAX_SCHEDULE_TIMEOUT) + continue; + + ret = timeo; + if (ret == 0) + ret = -ETIMEDOUT; + break; + } + + rdsdebug("copying inc %p from %pI4:%u to user\n", inc, + &inc->i_conn->c_faddr, + ntohs(inc->i_hdr.h_sport)); + ret = inc->i_conn->c_trans->inc_copy_to_user(inc, msg->msg_iov, + size); + if (ret < 0) + break; + + /* + * if the message we just copied isn't at the head of the + * recv queue then someone else raced us to return it, try + * to get the next message. + */ + if (!rds_still_queued(rs, inc, !(msg_flags & MSG_PEEK))) { + rds_inc_put(inc); + inc = NULL; + rds_stats_inc(s_recv_deliver_raced); + continue; + } + + if (ret < be32_to_cpu(inc->i_hdr.h_len)) { + if (msg_flags & MSG_TRUNC) + ret = be32_to_cpu(inc->i_hdr.h_len); + msg->msg_flags |= MSG_TRUNC; + } + + if (rds_cmsg_recv(inc, msg)) { + ret = -EFAULT; + goto out; + } + + rds_stats_inc(s_recv_delivered); + + sin = (struct sockaddr_in *)msg->msg_name; + if (sin) { + sin->sin_family = AF_INET; + sin->sin_port = inc->i_hdr.h_sport; + sin->sin_addr.s_addr = inc->i_saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } + break; + } + + if (inc) + rds_inc_put(inc); + +out: + return ret; +} + +/* + * The socket is being shut down and we're asked to drop messages that were + * queued for recvmsg. The caller has unbound the socket so the receive path + * won't queue any more incoming fragments or messages on the socket. + */ +void rds_clear_recv_queue(struct rds_sock *rs) +{ + struct sock *sk = rds_rs_to_sk(rs); + struct rds_incoming *inc, *tmp; + unsigned long flags; + + write_lock_irqsave(&rs->rs_recv_lock, flags); + list_for_each_entry_safe(inc, tmp, &rs->rs_recv_queue, i_item) { + rds_recv_rcvbuf_delta(rs, sk, inc->i_conn->c_lcong, + -be32_to_cpu(inc->i_hdr.h_len), + inc->i_hdr.h_dport); + list_del_init(&inc->i_item); + rds_inc_put(inc); + } + write_unlock_irqrestore(&rs->rs_recv_lock, flags); +} + +/* + * inc->i_saddr isn't used here because it is only set in the receive + * path. + */ +void rds_inc_info_copy(struct rds_incoming *inc, + struct rds_info_iterator *iter, + __be32 saddr, __be32 daddr, int flip) +{ + struct rds_info_message minfo; + + minfo.seq = be64_to_cpu(inc->i_hdr.h_sequence); + minfo.len = be32_to_cpu(inc->i_hdr.h_len); + + if (flip) { + minfo.laddr = daddr; + minfo.faddr = saddr; + minfo.lport = inc->i_hdr.h_dport; + minfo.fport = inc->i_hdr.h_sport; + } else { + minfo.laddr = saddr; + minfo.faddr = daddr; + minfo.lport = inc->i_hdr.h_sport; + minfo.fport = inc->i_hdr.h_dport; + } + + rds_info_copy(iter, &minfo, sizeof(minfo)); +} -- cgit v1.2.3 From eff5f53bef75c0864a5da06bb688939092b848dc Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:29 +0000 Subject: RDS: RDMA support Some transports may support RDMA features. This handles the non-transport-specific parts, like pinning user pages and tracking mapped regions. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma.c | 679 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/rds/rdma.h | 84 +++++++ 2 files changed, 763 insertions(+) create mode 100644 net/rds/rdma.c create mode 100644 net/rds/rdma.h (limited to 'net') diff --git a/net/rds/rdma.c b/net/rds/rdma.c new file mode 100644 index 000000000000..eaeeb91e1119 --- /dev/null +++ b/net/rds/rdma.c @@ -0,0 +1,679 @@ +/* + * Copyright (c) 2007 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include /* for DMA_*_DEVICE */ + +#include "rdma.h" + +/* + * XXX + * - build with sparse + * - should we limit the size of a mr region? let transport return failure? + * - should we detect duplicate keys on a socket? hmm. + * - an rdma is an mlock, apply rlimit? + */ + +/* + * get the number of pages by looking at the page indices that the start and + * end addresses fall in. + * + * Returns 0 if the vec is invalid. It is invalid if the number of bytes + * causes the address to wrap or overflows an unsigned int. This comes + * from being stored in the 'length' member of 'struct scatterlist'. + */ +static unsigned int rds_pages_in_vec(struct rds_iovec *vec) +{ + if ((vec->addr + vec->bytes <= vec->addr) || + (vec->bytes > (u64)UINT_MAX)) + return 0; + + return ((vec->addr + vec->bytes + PAGE_SIZE - 1) >> PAGE_SHIFT) - + (vec->addr >> PAGE_SHIFT); +} + +static struct rds_mr *rds_mr_tree_walk(struct rb_root *root, u64 key, + struct rds_mr *insert) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct rds_mr *mr; + + while (*p) { + parent = *p; + mr = rb_entry(parent, struct rds_mr, r_rb_node); + + if (key < mr->r_key) + p = &(*p)->rb_left; + else if (key > mr->r_key) + p = &(*p)->rb_right; + else + return mr; + } + + if (insert) { + rb_link_node(&insert->r_rb_node, parent, p); + rb_insert_color(&insert->r_rb_node, root); + atomic_inc(&insert->r_refcount); + } + return NULL; +} + +/* + * Destroy the transport-specific part of a MR. + */ +static void rds_destroy_mr(struct rds_mr *mr) +{ + struct rds_sock *rs = mr->r_sock; + void *trans_private = NULL; + unsigned long flags; + + rdsdebug("RDS: destroy mr key is %x refcnt %u\n", + mr->r_key, atomic_read(&mr->r_refcount)); + + if (test_and_set_bit(RDS_MR_DEAD, &mr->r_state)) + return; + + spin_lock_irqsave(&rs->rs_rdma_lock, flags); + if (!RB_EMPTY_NODE(&mr->r_rb_node)) + rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); + trans_private = mr->r_trans_private; + mr->r_trans_private = NULL; + spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); + + if (trans_private) + mr->r_trans->free_mr(trans_private, mr->r_invalidate); +} + +void __rds_put_mr_final(struct rds_mr *mr) +{ + rds_destroy_mr(mr); + kfree(mr); +} + +/* + * By the time this is called we can't have any more ioctls called on + * the socket so we don't need to worry about racing with others. + */ +void rds_rdma_drop_keys(struct rds_sock *rs) +{ + struct rds_mr *mr; + struct rb_node *node; + + /* Release any MRs associated with this socket */ + while ((node = rb_first(&rs->rs_rdma_keys))) { + mr = container_of(node, struct rds_mr, r_rb_node); + if (mr->r_trans == rs->rs_transport) + mr->r_invalidate = 0; + rds_mr_put(mr); + } + + if (rs->rs_transport && rs->rs_transport->flush_mrs) + rs->rs_transport->flush_mrs(); +} + +/* + * Helper function to pin user pages. + */ +static int rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, + struct page **pages, int write) +{ + int ret; + + down_read(¤t->mm->mmap_sem); + ret = get_user_pages(current, current->mm, user_addr, + nr_pages, write, 0, pages, NULL); + up_read(¤t->mm->mmap_sem); + + if (0 <= ret && (unsigned) ret < nr_pages) { + while (ret--) + put_page(pages[ret]); + ret = -EFAULT; + } + + return ret; +} + +static int __rds_rdma_map(struct rds_sock *rs, struct rds_get_mr_args *args, + u64 *cookie_ret, struct rds_mr **mr_ret) +{ + struct rds_mr *mr = NULL, *found; + unsigned int nr_pages; + struct page **pages = NULL; + struct scatterlist *sg; + void *trans_private; + unsigned long flags; + rds_rdma_cookie_t cookie; + unsigned int nents; + long i; + int ret; + + if (rs->rs_bound_addr == 0) { + ret = -ENOTCONN; /* XXX not a great errno */ + goto out; + } + + if (rs->rs_transport->get_mr == NULL) { + ret = -EOPNOTSUPP; + goto out; + } + + nr_pages = rds_pages_in_vec(&args->vec); + if (nr_pages == 0) { + ret = -EINVAL; + goto out; + } + + rdsdebug("RDS: get_mr addr %llx len %llu nr_pages %u\n", + args->vec.addr, args->vec.bytes, nr_pages); + + /* XXX clamp nr_pages to limit the size of this alloc? */ + pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + ret = -ENOMEM; + goto out; + } + + mr = kzalloc(sizeof(struct rds_mr), GFP_KERNEL); + if (mr == NULL) { + ret = -ENOMEM; + goto out; + } + + atomic_set(&mr->r_refcount, 1); + RB_CLEAR_NODE(&mr->r_rb_node); + mr->r_trans = rs->rs_transport; + mr->r_sock = rs; + + if (args->flags & RDS_RDMA_USE_ONCE) + mr->r_use_once = 1; + if (args->flags & RDS_RDMA_INVALIDATE) + mr->r_invalidate = 1; + if (args->flags & RDS_RDMA_READWRITE) + mr->r_write = 1; + + /* + * Pin the pages that make up the user buffer and transfer the page + * pointers to the mr's sg array. We check to see if we've mapped + * the whole region after transferring the partial page references + * to the sg array so that we can have one page ref cleanup path. + * + * For now we have no flag that tells us whether the mapping is + * r/o or r/w. We need to assume r/w, or we'll do a lot of RDMA to + * the zero page. + */ + ret = rds_pin_pages(args->vec.addr & PAGE_MASK, nr_pages, pages, 1); + if (ret < 0) + goto out; + + nents = ret; + sg = kcalloc(nents, sizeof(*sg), GFP_KERNEL); + if (sg == NULL) { + ret = -ENOMEM; + goto out; + } + WARN_ON(!nents); + sg_init_table(sg, nents); + + /* Stick all pages into the scatterlist */ + for (i = 0 ; i < nents; i++) + sg_set_page(&sg[i], pages[i], PAGE_SIZE, 0); + + rdsdebug("RDS: trans_private nents is %u\n", nents); + + /* Obtain a transport specific MR. If this succeeds, the + * s/g list is now owned by the MR. + * Note that dma_map() implies that pending writes are + * flushed to RAM, so no dma_sync is needed here. */ + trans_private = rs->rs_transport->get_mr(sg, nents, rs, + &mr->r_key); + + if (IS_ERR(trans_private)) { + for (i = 0 ; i < nents; i++) + put_page(sg_page(&sg[i])); + kfree(sg); + ret = PTR_ERR(trans_private); + goto out; + } + + mr->r_trans_private = trans_private; + + rdsdebug("RDS: get_mr put_user key is %x cookie_addr %p\n", + mr->r_key, (void *)(unsigned long) args->cookie_addr); + + /* The user may pass us an unaligned address, but we can only + * map page aligned regions. So we keep the offset, and build + * a 64bit cookie containing and pass that + * around. */ + cookie = rds_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGE_MASK); + if (cookie_ret) + *cookie_ret = cookie; + + if (args->cookie_addr && put_user(cookie, (u64 __user *)(unsigned long) args->cookie_addr)) { + ret = -EFAULT; + goto out; + } + + /* Inserting the new MR into the rbtree bumps its + * reference count. */ + spin_lock_irqsave(&rs->rs_rdma_lock, flags); + found = rds_mr_tree_walk(&rs->rs_rdma_keys, mr->r_key, mr); + spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); + + BUG_ON(found && found != mr); + + rdsdebug("RDS: get_mr key is %x\n", mr->r_key); + if (mr_ret) { + atomic_inc(&mr->r_refcount); + *mr_ret = mr; + } + + ret = 0; +out: + kfree(pages); + if (mr) + rds_mr_put(mr); + return ret; +} + +int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen) +{ + struct rds_get_mr_args args; + + if (optlen != sizeof(struct rds_get_mr_args)) + return -EINVAL; + + if (copy_from_user(&args, (struct rds_get_mr_args __user *)optval, + sizeof(struct rds_get_mr_args))) + return -EFAULT; + + return __rds_rdma_map(rs, &args, NULL, NULL); +} + +/* + * Free the MR indicated by the given R_Key + */ +int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen) +{ + struct rds_free_mr_args args; + struct rds_mr *mr; + unsigned long flags; + + if (optlen != sizeof(struct rds_free_mr_args)) + return -EINVAL; + + if (copy_from_user(&args, (struct rds_free_mr_args __user *)optval, + sizeof(struct rds_free_mr_args))) + return -EFAULT; + + /* Special case - a null cookie means flush all unused MRs */ + if (args.cookie == 0) { + if (!rs->rs_transport || !rs->rs_transport->flush_mrs) + return -EINVAL; + rs->rs_transport->flush_mrs(); + return 0; + } + + /* Look up the MR given its R_key and remove it from the rbtree + * so nobody else finds it. + * This should also prevent races with rds_rdma_unuse. + */ + spin_lock_irqsave(&rs->rs_rdma_lock, flags); + mr = rds_mr_tree_walk(&rs->rs_rdma_keys, rds_rdma_cookie_key(args.cookie), NULL); + if (mr) { + rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); + RB_CLEAR_NODE(&mr->r_rb_node); + if (args.flags & RDS_RDMA_INVALIDATE) + mr->r_invalidate = 1; + } + spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); + + if (!mr) + return -EINVAL; + + /* + * call rds_destroy_mr() ourselves so that we're sure it's done by the time + * we return. If we let rds_mr_put() do it it might not happen until + * someone else drops their ref. + */ + rds_destroy_mr(mr); + rds_mr_put(mr); + return 0; +} + +/* + * This is called when we receive an extension header that + * tells us this MR was used. It allows us to implement + * use_once semantics + */ +void rds_rdma_unuse(struct rds_sock *rs, u32 r_key, int force) +{ + struct rds_mr *mr; + unsigned long flags; + int zot_me = 0; + + spin_lock_irqsave(&rs->rs_rdma_lock, flags); + mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); + if (mr && (mr->r_use_once || force)) { + rb_erase(&mr->r_rb_node, &rs->rs_rdma_keys); + RB_CLEAR_NODE(&mr->r_rb_node); + zot_me = 1; + } else if (mr) + atomic_inc(&mr->r_refcount); + spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); + + /* May have to issue a dma_sync on this memory region. + * Note we could avoid this if the operation was a RDMA READ, + * but at this point we can't tell. */ + if (mr != NULL) { + if (mr->r_trans->sync_mr) + mr->r_trans->sync_mr(mr->r_trans_private, DMA_FROM_DEVICE); + + /* If the MR was marked as invalidate, this will + * trigger an async flush. */ + if (zot_me) + rds_destroy_mr(mr); + rds_mr_put(mr); + } +} + +void rds_rdma_free_op(struct rds_rdma_op *ro) +{ + unsigned int i; + + for (i = 0; i < ro->r_nents; i++) { + struct page *page = sg_page(&ro->r_sg[i]); + + /* Mark page dirty if it was possibly modified, which + * is the case for a RDMA_READ which copies from remote + * to local memory */ + if (!ro->r_write) + set_page_dirty(page); + put_page(page); + } + + kfree(ro->r_notifier); + kfree(ro); +} + +/* + * args is a pointer to an in-kernel copy in the sendmsg cmsg. + */ +static struct rds_rdma_op *rds_rdma_prepare(struct rds_sock *rs, + struct rds_rdma_args *args) +{ + struct rds_iovec vec; + struct rds_rdma_op *op = NULL; + unsigned int nr_pages; + unsigned int max_pages; + unsigned int nr_bytes; + struct page **pages = NULL; + struct rds_iovec __user *local_vec; + struct scatterlist *sg; + unsigned int nr; + unsigned int i, j; + int ret; + + + if (rs->rs_bound_addr == 0) { + ret = -ENOTCONN; /* XXX not a great errno */ + goto out; + } + + if (args->nr_local > (u64)UINT_MAX) { + ret = -EMSGSIZE; + goto out; + } + + nr_pages = 0; + max_pages = 0; + + local_vec = (struct rds_iovec __user *)(unsigned long) args->local_vec_addr; + + /* figure out the number of pages in the vector */ + for (i = 0; i < args->nr_local; i++) { + if (copy_from_user(&vec, &local_vec[i], + sizeof(struct rds_iovec))) { + ret = -EFAULT; + goto out; + } + + nr = rds_pages_in_vec(&vec); + if (nr == 0) { + ret = -EINVAL; + goto out; + } + + max_pages = max(nr, max_pages); + nr_pages += nr; + } + + pages = kcalloc(max_pages, sizeof(struct page *), GFP_KERNEL); + if (pages == NULL) { + ret = -ENOMEM; + goto out; + } + + op = kzalloc(offsetof(struct rds_rdma_op, r_sg[nr_pages]), GFP_KERNEL); + if (op == NULL) { + ret = -ENOMEM; + goto out; + } + + op->r_write = !!(args->flags & RDS_RDMA_READWRITE); + op->r_fence = !!(args->flags & RDS_RDMA_FENCE); + op->r_notify = !!(args->flags & RDS_RDMA_NOTIFY_ME); + op->r_recverr = rs->rs_recverr; + WARN_ON(!nr_pages); + sg_init_table(op->r_sg, nr_pages); + + if (op->r_notify || op->r_recverr) { + /* We allocate an uninitialized notifier here, because + * we don't want to do that in the completion handler. We + * would have to use GFP_ATOMIC there, and don't want to deal + * with failed allocations. + */ + op->r_notifier = kmalloc(sizeof(struct rds_notifier), GFP_KERNEL); + if (!op->r_notifier) { + ret = -ENOMEM; + goto out; + } + op->r_notifier->n_user_token = args->user_token; + op->r_notifier->n_status = RDS_RDMA_SUCCESS; + } + + /* The cookie contains the R_Key of the remote memory region, and + * optionally an offset into it. This is how we implement RDMA into + * unaligned memory. + * When setting up the RDMA, we need to add that offset to the + * destination address (which is really an offset into the MR) + * FIXME: We may want to move this into ib_rdma.c + */ + op->r_key = rds_rdma_cookie_key(args->cookie); + op->r_remote_addr = args->remote_vec.addr + rds_rdma_cookie_offset(args->cookie); + + nr_bytes = 0; + + rdsdebug("RDS: rdma prepare nr_local %llu rva %llx rkey %x\n", + (unsigned long long)args->nr_local, + (unsigned long long)args->remote_vec.addr, + op->r_key); + + for (i = 0; i < args->nr_local; i++) { + if (copy_from_user(&vec, &local_vec[i], + sizeof(struct rds_iovec))) { + ret = -EFAULT; + goto out; + } + + nr = rds_pages_in_vec(&vec); + if (nr == 0) { + ret = -EINVAL; + goto out; + } + + rs->rs_user_addr = vec.addr; + rs->rs_user_bytes = vec.bytes; + + /* did the user change the vec under us? */ + if (nr > max_pages || op->r_nents + nr > nr_pages) { + ret = -EINVAL; + goto out; + } + /* If it's a WRITE operation, we want to pin the pages for reading. + * If it's a READ operation, we need to pin the pages for writing. + */ + ret = rds_pin_pages(vec.addr & PAGE_MASK, nr, pages, !op->r_write); + if (ret < 0) + goto out; + + rdsdebug("RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx\n", + nr_bytes, nr, vec.bytes, vec.addr); + + nr_bytes += vec.bytes; + + for (j = 0; j < nr; j++) { + unsigned int offset = vec.addr & ~PAGE_MASK; + + sg = &op->r_sg[op->r_nents + j]; + sg_set_page(sg, pages[j], + min_t(unsigned int, vec.bytes, PAGE_SIZE - offset), + offset); + + rdsdebug("RDS: sg->offset %x sg->len %x vec.addr %llx vec.bytes %llu\n", + sg->offset, sg->length, vec.addr, vec.bytes); + + vec.addr += sg->length; + vec.bytes -= sg->length; + } + + op->r_nents += nr; + } + + + if (nr_bytes > args->remote_vec.bytes) { + rdsdebug("RDS nr_bytes %u remote_bytes %u do not match\n", + nr_bytes, + (unsigned int) args->remote_vec.bytes); + ret = -EINVAL; + goto out; + } + op->r_bytes = nr_bytes; + + ret = 0; +out: + kfree(pages); + if (ret) { + if (op) + rds_rdma_free_op(op); + op = ERR_PTR(ret); + } + return op; +} + +/* + * The application asks for a RDMA transfer. + * Extract all arguments and set up the rdma_op + */ +int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg) +{ + struct rds_rdma_op *op; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_rdma_args)) + || rm->m_rdma_op != NULL) + return -EINVAL; + + op = rds_rdma_prepare(rs, CMSG_DATA(cmsg)); + if (IS_ERR(op)) + return PTR_ERR(op); + rds_stats_inc(s_send_rdma); + rm->m_rdma_op = op; + return 0; +} + +/* + * The application wants us to pass an RDMA destination (aka MR) + * to the remote + */ +int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg) +{ + unsigned long flags; + struct rds_mr *mr; + u32 r_key; + int err = 0; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(rds_rdma_cookie_t)) + || rm->m_rdma_cookie != 0) + return -EINVAL; + + memcpy(&rm->m_rdma_cookie, CMSG_DATA(cmsg), sizeof(rm->m_rdma_cookie)); + + /* We are reusing a previously mapped MR here. Most likely, the + * application has written to the buffer, so we need to explicitly + * flush those writes to RAM. Otherwise the HCA may not see them + * when doing a DMA from that buffer. + */ + r_key = rds_rdma_cookie_key(rm->m_rdma_cookie); + + spin_lock_irqsave(&rs->rs_rdma_lock, flags); + mr = rds_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); + if (mr == NULL) + err = -EINVAL; /* invalid r_key */ + else + atomic_inc(&mr->r_refcount); + spin_unlock_irqrestore(&rs->rs_rdma_lock, flags); + + if (mr) { + mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); + rm->m_rdma_mr = mr; + } + return err; +} + +/* + * The application passes us an address range it wants to enable RDMA + * to/from. We map the area, and save the pair + * in rm->m_rdma_cookie. This causes it to be sent along to the peer + * in an extension header. + */ +int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg) +{ + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct rds_get_mr_args)) + || rm->m_rdma_cookie != 0) + return -EINVAL; + + return __rds_rdma_map(rs, CMSG_DATA(cmsg), &rm->m_rdma_cookie, &rm->m_rdma_mr); +} diff --git a/net/rds/rdma.h b/net/rds/rdma.h new file mode 100644 index 000000000000..425512098b0b --- /dev/null +++ b/net/rds/rdma.h @@ -0,0 +1,84 @@ +#ifndef _RDS_RDMA_H +#define _RDS_RDMA_H + +#include +#include +#include + +#include "rds.h" + +struct rds_mr { + struct rb_node r_rb_node; + atomic_t r_refcount; + u32 r_key; + + /* A copy of the creation flags */ + unsigned int r_use_once:1; + unsigned int r_invalidate:1; + unsigned int r_write:1; + + /* This is for RDS_MR_DEAD. + * It would be nice & consistent to make this part of the above + * bit field here, but we need to use test_and_set_bit. + */ + unsigned long r_state; + struct rds_sock *r_sock; /* back pointer to the socket that owns us */ + struct rds_transport *r_trans; + void *r_trans_private; +}; + +/* Flags for mr->r_state */ +#define RDS_MR_DEAD 0 + +struct rds_rdma_op { + u32 r_key; + u64 r_remote_addr; + unsigned int r_write:1; + unsigned int r_fence:1; + unsigned int r_notify:1; + unsigned int r_recverr:1; + unsigned int r_mapped:1; + struct rds_notifier *r_notifier; + unsigned int r_bytes; + unsigned int r_nents; + unsigned int r_count; + struct scatterlist r_sg[0]; +}; + +static inline rds_rdma_cookie_t rds_rdma_make_cookie(u32 r_key, u32 offset) +{ + return r_key | (((u64) offset) << 32); +} + +static inline u32 rds_rdma_cookie_key(rds_rdma_cookie_t cookie) +{ + return cookie; +} + +static inline u32 rds_rdma_cookie_offset(rds_rdma_cookie_t cookie) +{ + return cookie >> 32; +} + +int rds_get_mr(struct rds_sock *rs, char __user *optval, int optlen); +int rds_free_mr(struct rds_sock *rs, char __user *optval, int optlen); +void rds_rdma_drop_keys(struct rds_sock *rs); +int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg); +int rds_cmsg_rdma_dest(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg); +int rds_cmsg_rdma_args(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg); +int rds_cmsg_rdma_map(struct rds_sock *rs, struct rds_message *rm, + struct cmsghdr *cmsg); +void rds_rdma_free_op(struct rds_rdma_op *ro); +void rds_rdma_send_complete(struct rds_message *rm, int); + +extern void __rds_put_mr_final(struct rds_mr *mr); +static inline void rds_mr_put(struct rds_mr *mr) +{ + if (atomic_dec_and_test(&mr->r_refcount)) + __rds_put_mr_final(mr); +} + +#endif -- cgit v1.2.3 From ec16227e14141e4fd7ae76354c09dadfe2449d9e Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:30 +0000 Subject: RDS/IB: Infiniband transport Registers as an RDS transport and an IB client, and uses IB CM API to allocate ids, queue pairs, and the rest of that fun stuff. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.c | 323 +++++++++++++++++++++++++ net/rds/ib.h | 367 ++++++++++++++++++++++++++++ net/rds/ib_cm.c | 726 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1416 insertions(+) create mode 100644 net/rds/ib.c create mode 100644 net/rds/ib.h create mode 100644 net/rds/ib_cm.c (limited to 'net') diff --git a/net/rds/ib.c b/net/rds/ib.c new file mode 100644 index 000000000000..06a7b798d9a7 --- /dev/null +++ b/net/rds/ib.c @@ -0,0 +1,323 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include "rds.h" +#include "ib.h" + +unsigned int fmr_pool_size = RDS_FMR_POOL_SIZE; +unsigned int fmr_message_size = RDS_FMR_SIZE + 1; /* +1 allows for unaligned MRs */ + +module_param(fmr_pool_size, int, 0444); +MODULE_PARM_DESC(fmr_pool_size, " Max number of fmr per HCA"); +module_param(fmr_message_size, int, 0444); +MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); + +struct list_head rds_ib_devices; + +DEFINE_SPINLOCK(ib_nodev_conns_lock); +LIST_HEAD(ib_nodev_conns); + +void rds_ib_add_one(struct ib_device *device) +{ + struct rds_ib_device *rds_ibdev; + struct ib_device_attr *dev_attr; + + /* Only handle IB (no iWARP) devices */ + if (device->node_type != RDMA_NODE_IB_CA) + return; + + dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); + if (!dev_attr) + return; + + if (ib_query_device(device, dev_attr)) { + rdsdebug("Query device failed for %s\n", device->name); + goto free_attr; + } + + rds_ibdev = kmalloc(sizeof *rds_ibdev, GFP_KERNEL); + if (!rds_ibdev) + goto free_attr; + + spin_lock_init(&rds_ibdev->spinlock); + + rds_ibdev->max_wrs = dev_attr->max_qp_wr; + rds_ibdev->max_sge = min(dev_attr->max_sge, RDS_IB_MAX_SGE); + + rds_ibdev->fmr_page_shift = max(9, ffs(dev_attr->page_size_cap) - 1); + rds_ibdev->fmr_page_size = 1 << rds_ibdev->fmr_page_shift; + rds_ibdev->fmr_page_mask = ~((u64) rds_ibdev->fmr_page_size - 1); + rds_ibdev->fmr_max_remaps = dev_attr->max_map_per_fmr?: 32; + rds_ibdev->max_fmrs = dev_attr->max_fmr ? + min_t(unsigned int, dev_attr->max_fmr, fmr_pool_size) : + fmr_pool_size; + + rds_ibdev->dev = device; + rds_ibdev->pd = ib_alloc_pd(device); + if (IS_ERR(rds_ibdev->pd)) + goto free_dev; + + rds_ibdev->mr = ib_get_dma_mr(rds_ibdev->pd, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(rds_ibdev->mr)) + goto err_pd; + + rds_ibdev->mr_pool = rds_ib_create_mr_pool(rds_ibdev); + if (IS_ERR(rds_ibdev->mr_pool)) { + rds_ibdev->mr_pool = NULL; + goto err_mr; + } + + INIT_LIST_HEAD(&rds_ibdev->ipaddr_list); + INIT_LIST_HEAD(&rds_ibdev->conn_list); + list_add_tail(&rds_ibdev->list, &rds_ib_devices); + + ib_set_client_data(device, &rds_ib_client, rds_ibdev); + + goto free_attr; + +err_mr: + ib_dereg_mr(rds_ibdev->mr); +err_pd: + ib_dealloc_pd(rds_ibdev->pd); +free_dev: + kfree(rds_ibdev); +free_attr: + kfree(dev_attr); +} + +void rds_ib_remove_one(struct ib_device *device) +{ + struct rds_ib_device *rds_ibdev; + struct rds_ib_ipaddr *i_ipaddr, *i_next; + + rds_ibdev = ib_get_client_data(device, &rds_ib_client); + if (!rds_ibdev) + return; + + list_for_each_entry_safe(i_ipaddr, i_next, &rds_ibdev->ipaddr_list, list) { + list_del(&i_ipaddr->list); + kfree(i_ipaddr); + } + + rds_ib_remove_conns(rds_ibdev); + + if (rds_ibdev->mr_pool) + rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); + + ib_dereg_mr(rds_ibdev->mr); + + while (ib_dealloc_pd(rds_ibdev->pd)) { + rdsdebug("Failed to dealloc pd %p\n", rds_ibdev->pd); + msleep(1); + } + + list_del(&rds_ibdev->list); + kfree(rds_ibdev); +} + +struct ib_client rds_ib_client = { + .name = "rds_ib", + .add = rds_ib_add_one, + .remove = rds_ib_remove_one +}; + +static int rds_ib_conn_info_visitor(struct rds_connection *conn, + void *buffer) +{ + struct rds_info_rdma_connection *iinfo = buffer; + struct rds_ib_connection *ic; + + /* We will only ever look at IB transports */ + if (conn->c_trans != &rds_ib_transport) + return 0; + + iinfo->src_addr = conn->c_laddr; + iinfo->dst_addr = conn->c_faddr; + + memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); + memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); + if (rds_conn_state(conn) == RDS_CONN_UP) { + struct rds_ib_device *rds_ibdev; + struct rdma_dev_addr *dev_addr; + + ic = conn->c_transport_data; + dev_addr = &ic->i_cm_id->route.addr.dev_addr; + + ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + + rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); + iinfo->max_send_wr = ic->i_send_ring.w_nr; + iinfo->max_recv_wr = ic->i_recv_ring.w_nr; + iinfo->max_send_sge = rds_ibdev->max_sge; + rds_ib_get_mr_info(rds_ibdev, iinfo); + } + return 1; +} + +static void rds_ib_ic_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + rds_for_each_conn_info(sock, len, iter, lens, + rds_ib_conn_info_visitor, + sizeof(struct rds_info_rdma_connection)); +} + + +/* + * Early RDS/IB was built to only bind to an address if there is an IPoIB + * device with that address set. + * + * If it were me, I'd advocate for something more flexible. Sending and + * receiving should be device-agnostic. Transports would try and maintain + * connections between peers who have messages queued. Userspace would be + * allowed to influence which paths have priority. We could call userspace + * asserting this policy "routing". + */ +static int rds_ib_laddr_check(__be32 addr) +{ + int ret; + struct rdma_cm_id *cm_id; + struct sockaddr_in sin; + + /* Create a CMA ID and try to bind it. This catches both + * IB and iWARP capable NICs. + */ + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + if (!cm_id) + return -EADDRNOTAVAIL; + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = addr; + + /* rdma_bind_addr will only succeed for IB & iWARP devices */ + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); + /* due to this, we will claim to support iWARP devices unless we + check node_type. */ + if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) + ret = -EADDRNOTAVAIL; + + rdsdebug("addr %pI4 ret %d node type %d\n", + &addr, ret, + cm_id->device ? cm_id->device->node_type : -1); + + rdma_destroy_id(cm_id); + + return ret; +} + +void rds_ib_exit(void) +{ + rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); + rds_ib_remove_nodev_conns(); + ib_unregister_client(&rds_ib_client); + rds_ib_sysctl_exit(); + rds_ib_recv_exit(); + rds_trans_unregister(&rds_ib_transport); +} + +struct rds_transport rds_ib_transport = { + .laddr_check = rds_ib_laddr_check, + .xmit_complete = rds_ib_xmit_complete, + .xmit = rds_ib_xmit, + .xmit_cong_map = NULL, + .xmit_rdma = rds_ib_xmit_rdma, + .recv = rds_ib_recv, + .conn_alloc = rds_ib_conn_alloc, + .conn_free = rds_ib_conn_free, + .conn_connect = rds_ib_conn_connect, + .conn_shutdown = rds_ib_conn_shutdown, + .inc_copy_to_user = rds_ib_inc_copy_to_user, + .inc_purge = rds_ib_inc_purge, + .inc_free = rds_ib_inc_free, + .cm_initiate_connect = rds_ib_cm_initiate_connect, + .cm_handle_connect = rds_ib_cm_handle_connect, + .cm_connect_complete = rds_ib_cm_connect_complete, + .stats_info_copy = rds_ib_stats_info_copy, + .exit = rds_ib_exit, + .get_mr = rds_ib_get_mr, + .sync_mr = rds_ib_sync_mr, + .free_mr = rds_ib_free_mr, + .flush_mrs = rds_ib_flush_mrs, + .t_owner = THIS_MODULE, + .t_name = "infiniband", +}; + +int __init rds_ib_init(void) +{ + int ret; + + INIT_LIST_HEAD(&rds_ib_devices); + + ret = ib_register_client(&rds_ib_client); + if (ret) + goto out; + + ret = rds_ib_sysctl_init(); + if (ret) + goto out_ibreg; + + ret = rds_ib_recv_init(); + if (ret) + goto out_sysctl; + + ret = rds_trans_register(&rds_ib_transport); + if (ret) + goto out_recv; + + rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); + + goto out; + +out_recv: + rds_ib_recv_exit(); +out_sysctl: + rds_ib_sysctl_exit(); +out_ibreg: + ib_unregister_client(&rds_ib_client); +out: + return ret; +} + +MODULE_LICENSE("GPL"); + diff --git a/net/rds/ib.h b/net/rds/ib.h new file mode 100644 index 000000000000..8be563a1363a --- /dev/null +++ b/net/rds/ib.h @@ -0,0 +1,367 @@ +#ifndef _RDS_IB_H +#define _RDS_IB_H + +#include +#include +#include "rds.h" +#include "rdma_transport.h" + +#define RDS_FMR_SIZE 256 +#define RDS_FMR_POOL_SIZE 4096 + +#define RDS_IB_MAX_SGE 8 +#define RDS_IB_RECV_SGE 2 + +#define RDS_IB_DEFAULT_RECV_WR 1024 +#define RDS_IB_DEFAULT_SEND_WR 256 + +#define RDS_IB_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ + +extern struct list_head rds_ib_devices; + +/* + * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to + * try and minimize the amount of memory tied up both the device and + * socket receive queues. + */ +/* page offset of the final full frag that fits in the page */ +#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE) +struct rds_page_frag { + struct list_head f_item; + struct page *f_page; + unsigned long f_offset; + dma_addr_t f_mapped; +}; + +struct rds_ib_incoming { + struct list_head ii_frags; + struct rds_incoming ii_inc; +}; + +struct rds_ib_connect_private { + /* Add new fields at the end, and don't permute existing fields. */ + __be32 dp_saddr; + __be32 dp_daddr; + u8 dp_protocol_major; + u8 dp_protocol_minor; + __be16 dp_protocol_minor_mask; /* bitmask */ + __be32 dp_reserved1; + __be64 dp_ack_seq; + __be32 dp_credit; /* non-zero enables flow ctl */ +}; + +struct rds_ib_send_work { + struct rds_message *s_rm; + struct rds_rdma_op *s_op; + struct ib_send_wr s_wr; + struct ib_sge s_sge[RDS_IB_MAX_SGE]; + unsigned long s_queued; +}; + +struct rds_ib_recv_work { + struct rds_ib_incoming *r_ibinc; + struct rds_page_frag *r_frag; + struct ib_recv_wr r_wr; + struct ib_sge r_sge[2]; +}; + +struct rds_ib_work_ring { + u32 w_nr; + u32 w_alloc_ptr; + u32 w_alloc_ctr; + u32 w_free_ptr; + atomic_t w_free_ctr; +}; + +struct rds_ib_device; + +struct rds_ib_connection { + + struct list_head ib_node; + struct rds_ib_device *rds_ibdev; + struct rds_connection *conn; + + /* alphabet soup, IBTA style */ + struct rdma_cm_id *i_cm_id; + struct ib_pd *i_pd; + struct ib_mr *i_mr; + struct ib_cq *i_send_cq; + struct ib_cq *i_recv_cq; + + /* tx */ + struct rds_ib_work_ring i_send_ring; + struct rds_message *i_rm; + struct rds_header *i_send_hdrs; + u64 i_send_hdrs_dma; + struct rds_ib_send_work *i_sends; + + /* rx */ + struct mutex i_recv_mutex; + struct rds_ib_work_ring i_recv_ring; + struct rds_ib_incoming *i_ibinc; + u32 i_recv_data_rem; + struct rds_header *i_recv_hdrs; + u64 i_recv_hdrs_dma; + struct rds_ib_recv_work *i_recvs; + struct rds_page_frag i_frag; + u64 i_ack_recv; /* last ACK received */ + + /* sending acks */ + unsigned long i_ack_flags; + u64 i_ack_next; /* next ACK to send */ + struct rds_header *i_ack; + struct ib_send_wr i_ack_wr; + struct ib_sge i_ack_sge; + u64 i_ack_dma; + unsigned long i_ack_queued; + + /* Flow control related information + * + * Our algorithm uses a pair variables that we need to access + * atomically - one for the send credits, and one posted + * recv credits we need to transfer to remote. + * Rather than protect them using a slow spinlock, we put both into + * a single atomic_t and update it using cmpxchg + */ + atomic_t i_credits; + + /* Protocol version specific information */ + unsigned int i_flowctl:1; /* enable/disable flow ctl */ + + /* Batched completions */ + unsigned int i_unsignaled_wrs; + long i_unsignaled_bytes; +}; + +/* This assumes that atomic_t is at least 32 bits */ +#define IB_GET_SEND_CREDITS(v) ((v) & 0xffff) +#define IB_GET_POST_CREDITS(v) ((v) >> 16) +#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff) +#define IB_SET_POST_CREDITS(v) ((v) << 16) + +struct rds_ib_ipaddr { + struct list_head list; + __be32 ipaddr; +}; + +struct rds_ib_device { + struct list_head list; + struct list_head ipaddr_list; + struct list_head conn_list; + struct ib_device *dev; + struct ib_pd *pd; + struct ib_mr *mr; + struct rds_ib_mr_pool *mr_pool; + int fmr_page_shift; + int fmr_page_size; + u64 fmr_page_mask; + unsigned int fmr_max_remaps; + unsigned int max_fmrs; + int max_sge; + unsigned int max_wrs; + spinlock_t spinlock; /* protect the above */ +}; + +/* bits for i_ack_flags */ +#define IB_ACK_IN_FLIGHT 0 +#define IB_ACK_REQUESTED 1 + +/* Magic WR_ID for ACKs */ +#define RDS_IB_ACK_WR_ID (~(u64) 0) + +struct rds_ib_statistics { + uint64_t s_ib_connect_raced; + uint64_t s_ib_listen_closed_stale; + uint64_t s_ib_tx_cq_call; + uint64_t s_ib_tx_cq_event; + uint64_t s_ib_tx_ring_full; + uint64_t s_ib_tx_throttle; + uint64_t s_ib_tx_sg_mapping_failure; + uint64_t s_ib_tx_stalled; + uint64_t s_ib_tx_credit_updates; + uint64_t s_ib_rx_cq_call; + uint64_t s_ib_rx_cq_event; + uint64_t s_ib_rx_ring_empty; + uint64_t s_ib_rx_refill_from_cq; + uint64_t s_ib_rx_refill_from_thread; + uint64_t s_ib_rx_alloc_limit; + uint64_t s_ib_rx_credit_updates; + uint64_t s_ib_ack_sent; + uint64_t s_ib_ack_send_failure; + uint64_t s_ib_ack_send_delayed; + uint64_t s_ib_ack_send_piggybacked; + uint64_t s_ib_ack_received; + uint64_t s_ib_rdma_mr_alloc; + uint64_t s_ib_rdma_mr_free; + uint64_t s_ib_rdma_mr_used; + uint64_t s_ib_rdma_mr_pool_flush; + uint64_t s_ib_rdma_mr_pool_wait; + uint64_t s_ib_rdma_mr_pool_depleted; +}; + +extern struct workqueue_struct *rds_ib_wq; + +/* + * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h + * doesn't define it. + */ +static inline void rds_ib_dma_sync_sg_for_cpu(struct ib_device *dev, + struct scatterlist *sg, unsigned int sg_dma_len, int direction) +{ + unsigned int i; + + for (i = 0; i < sg_dma_len; ++i) { + ib_dma_sync_single_for_cpu(dev, + ib_sg_dma_address(dev, &sg[i]), + ib_sg_dma_len(dev, &sg[i]), + direction); + } +} +#define ib_dma_sync_sg_for_cpu rds_ib_dma_sync_sg_for_cpu + +static inline void rds_ib_dma_sync_sg_for_device(struct ib_device *dev, + struct scatterlist *sg, unsigned int sg_dma_len, int direction) +{ + unsigned int i; + + for (i = 0; i < sg_dma_len; ++i) { + ib_dma_sync_single_for_device(dev, + ib_sg_dma_address(dev, &sg[i]), + ib_sg_dma_len(dev, &sg[i]), + direction); + } +} +#define ib_dma_sync_sg_for_device rds_ib_dma_sync_sg_for_device + + +/* ib.c */ +extern struct rds_transport rds_ib_transport; +extern void rds_ib_add_one(struct ib_device *device); +extern void rds_ib_remove_one(struct ib_device *device); +extern struct ib_client rds_ib_client; + +extern unsigned int fmr_pool_size; +extern unsigned int fmr_message_size; + +extern spinlock_t ib_nodev_conns_lock; +extern struct list_head ib_nodev_conns; + +/* ib_cm.c */ +int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp); +void rds_ib_conn_free(void *arg); +int rds_ib_conn_connect(struct rds_connection *conn); +void rds_ib_conn_shutdown(struct rds_connection *conn); +void rds_ib_state_change(struct sock *sk); +int __init rds_ib_listen_init(void); +void rds_ib_listen_stop(void); +void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); +int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event); +int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id); +void rds_ib_cm_connect_complete(struct rds_connection *conn, + struct rdma_cm_event *event); + + +#define rds_ib_conn_error(conn, fmt...) \ + __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) + +/* ib_rdma.c */ +int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); +int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); +void rds_ib_remove_nodev_conns(void); +void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); +struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); +void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); +void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); +void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, + struct rds_sock *rs, u32 *key_ret); +void rds_ib_sync_mr(void *trans_private, int dir); +void rds_ib_free_mr(void *trans_private, int invalidate); +void rds_ib_flush_mrs(void); + +/* ib_recv.c */ +int __init rds_ib_recv_init(void); +void rds_ib_recv_exit(void); +int rds_ib_recv(struct rds_connection *conn); +int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, + gfp_t page_gfp, int prefill); +void rds_ib_inc_purge(struct rds_incoming *inc); +void rds_ib_inc_free(struct rds_incoming *inc); +int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, + size_t size); +void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context); +void rds_ib_recv_init_ring(struct rds_ib_connection *ic); +void rds_ib_recv_clear_ring(struct rds_ib_connection *ic); +void rds_ib_recv_init_ack(struct rds_ib_connection *ic); +void rds_ib_attempt_ack(struct rds_ib_connection *ic); +void rds_ib_ack_send_complete(struct rds_ib_connection *ic); +u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic); + +/* ib_ring.c */ +void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr); +void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr); +u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos); +void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val); +void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val); +int rds_ib_ring_empty(struct rds_ib_work_ring *ring); +int rds_ib_ring_low(struct rds_ib_work_ring *ring); +u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring); +u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest); +extern wait_queue_head_t rds_ib_ring_empty_wait; + +/* ib_send.c */ +void rds_ib_xmit_complete(struct rds_connection *conn); +int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, + unsigned int hdr_off, unsigned int sg, unsigned int off); +void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context); +void rds_ib_send_init_ring(struct rds_ib_connection *ic); +void rds_ib_send_clear_ring(struct rds_ib_connection *ic); +int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); +void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits); +void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted); +int rds_ib_send_grab_credits(struct rds_ib_connection *ic, u32 wanted, + u32 *adv_credits, int need_posted); + +/* ib_stats.c */ +DECLARE_PER_CPU(struct rds_ib_statistics, rds_ib_stats); +#define rds_ib_stats_inc(member) rds_stats_inc_which(rds_ib_stats, member) +unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, + unsigned int avail); + +/* ib_sysctl.c */ +int __init rds_ib_sysctl_init(void); +void rds_ib_sysctl_exit(void); +extern unsigned long rds_ib_sysctl_max_send_wr; +extern unsigned long rds_ib_sysctl_max_recv_wr; +extern unsigned long rds_ib_sysctl_max_unsig_wrs; +extern unsigned long rds_ib_sysctl_max_unsig_bytes; +extern unsigned long rds_ib_sysctl_max_recv_allocation; +extern unsigned int rds_ib_sysctl_flow_control; +extern ctl_table rds_ib_sysctl_table[]; + +/* + * Helper functions for getting/setting the header and data SGEs in + * RDS packets (not RDMA) + */ +static inline struct ib_sge * +rds_ib_header_sge(struct rds_ib_connection *ic, struct ib_sge *sge) +{ + return &sge[0]; +} + +static inline struct ib_sge * +rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge) +{ + return &sge[1]; +} + +static inline void rds_ib_set_64bit(u64 *ptr, u64 val) +{ +#if BITS_PER_LONG == 64 + *ptr = val; +#else + set_64bit(ptr, val); +#endif +} + +#endif diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c new file mode 100644 index 000000000000..0532237bd128 --- /dev/null +++ b/net/rds/ib_cm.c @@ -0,0 +1,726 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "ib.h" + +/* + * Set the selected protocol version + */ +static void rds_ib_set_protocol(struct rds_connection *conn, unsigned int version) +{ + conn->c_version = version; +} + +/* + * Set up flow control + */ +static void rds_ib_set_flow_control(struct rds_connection *conn, u32 credits) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + + if (rds_ib_sysctl_flow_control && credits != 0) { + /* We're doing flow control */ + ic->i_flowctl = 1; + rds_ib_send_add_credits(conn, credits); + } else { + ic->i_flowctl = 0; + } +} + +/* + * Tune RNR behavior. Without flow control, we use a rather + * low timeout, but not the absolute minimum - this should + * be tunable. + * + * We already set the RNR retry count to 7 (which is the + * smallest infinite number :-) above. + * If flow control is off, we want to change this back to 0 + * so that we learn quickly when our credit accounting is + * buggy. + * + * Caller passes in a qp_attr pointer - don't waste stack spacv + * by allocation this twice. + */ +static void +rds_ib_tune_rnr(struct rds_ib_connection *ic, struct ib_qp_attr *attr) +{ + int ret; + + attr->min_rnr_timer = IB_RNR_TIMER_000_32; + ret = ib_modify_qp(ic->i_cm_id->qp, attr, IB_QP_MIN_RNR_TIMER); + if (ret) + printk(KERN_NOTICE "ib_modify_qp(IB_QP_MIN_RNR_TIMER): err=%d\n", -ret); +} + +/* + * Connection established. + * We get here for both outgoing and incoming connection. + */ +void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) +{ + const struct rds_ib_connect_private *dp = NULL; + struct rds_ib_connection *ic = conn->c_transport_data; + struct rds_ib_device *rds_ibdev; + struct ib_qp_attr qp_attr; + int err; + + if (event->param.conn.private_data_len) { + dp = event->param.conn.private_data; + + rds_ib_set_protocol(conn, + RDS_PROTOCOL(dp->dp_protocol_major, + dp->dp_protocol_minor)); + rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); + } + + printk(KERN_NOTICE "RDS/IB: connected to %pI4 version %u.%u%s\n", + &conn->c_laddr, + RDS_PROTOCOL_MAJOR(conn->c_version), + RDS_PROTOCOL_MINOR(conn->c_version), + ic->i_flowctl ? ", flow control" : ""); + + /* Tune RNR behavior */ + rds_ib_tune_rnr(ic, &qp_attr); + + qp_attr.qp_state = IB_QPS_RTS; + err = ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); + if (err) + printk(KERN_NOTICE "ib_modify_qp(IB_QP_STATE, RTS): err=%d\n", err); + + /* update ib_device with this local ipaddr & conn */ + rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); + err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); + if (err) + printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); + err = rds_ib_add_conn(rds_ibdev, conn); + if (err) + printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err); + + /* If the peer gave us the last packet it saw, process this as if + * we had received a regular ACK. */ + if (dp && dp->dp_ack_seq) + rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); + + rds_connect_complete(conn); +} + +static void rds_ib_cm_fill_conn_param(struct rds_connection *conn, + struct rdma_conn_param *conn_param, + struct rds_ib_connect_private *dp, + u32 protocol_version) +{ + memset(conn_param, 0, sizeof(struct rdma_conn_param)); + /* XXX tune these? */ + conn_param->responder_resources = 1; + conn_param->initiator_depth = 1; + conn_param->retry_count = 7; + conn_param->rnr_retry_count = 7; + + if (dp) { + struct rds_ib_connection *ic = conn->c_transport_data; + + memset(dp, 0, sizeof(*dp)); + dp->dp_saddr = conn->c_laddr; + dp->dp_daddr = conn->c_faddr; + dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); + dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); + dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IB_SUPPORTED_PROTOCOLS); + dp->dp_ack_seq = rds_ib_piggyb_ack(ic); + + /* Advertise flow control */ + if (ic->i_flowctl) { + unsigned int credits; + + credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)); + dp->dp_credit = cpu_to_be32(credits); + atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits); + } + + conn_param->private_data = dp; + conn_param->private_data_len = sizeof(*dp); + } +} + +static void rds_ib_cq_event_handler(struct ib_event *event, void *data) +{ + rdsdebug("event %u data %p\n", event->event, data); +} + +static void rds_ib_qp_event_handler(struct ib_event *event, void *data) +{ + struct rds_connection *conn = data; + struct rds_ib_connection *ic = conn->c_transport_data; + + rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); + + switch (event->event) { + case IB_EVENT_COMM_EST: + rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); + break; + default: + printk(KERN_WARNING "RDS/ib: unhandled QP event %u " + "on connection to %pI4\n", event->event, + &conn->c_faddr); + break; + } +} + +/* + * This needs to be very careful to not leave IS_ERR pointers around for + * cleanup to trip over. + */ +static int rds_ib_setup_qp(struct rds_connection *conn) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct ib_device *dev = ic->i_cm_id->device; + struct ib_qp_init_attr attr; + struct rds_ib_device *rds_ibdev; + int ret; + + /* rds_ib_add_one creates a rds_ib_device object per IB device, + * and allocates a protection domain, memory range and FMR pool + * for each. If that fails for any reason, it will not register + * the rds_ibdev at all. + */ + rds_ibdev = ib_get_client_data(dev, &rds_ib_client); + if (rds_ibdev == NULL) { + if (printk_ratelimit()) + printk(KERN_NOTICE "RDS/IB: No client_data for device %s\n", + dev->name); + return -EOPNOTSUPP; + } + + if (rds_ibdev->max_wrs < ic->i_send_ring.w_nr + 1) + rds_ib_ring_resize(&ic->i_send_ring, rds_ibdev->max_wrs - 1); + if (rds_ibdev->max_wrs < ic->i_recv_ring.w_nr + 1) + rds_ib_ring_resize(&ic->i_recv_ring, rds_ibdev->max_wrs - 1); + + /* Protection domain and memory range */ + ic->i_pd = rds_ibdev->pd; + ic->i_mr = rds_ibdev->mr; + + ic->i_send_cq = ib_create_cq(dev, rds_ib_send_cq_comp_handler, + rds_ib_cq_event_handler, conn, + ic->i_send_ring.w_nr + 1, 0); + if (IS_ERR(ic->i_send_cq)) { + ret = PTR_ERR(ic->i_send_cq); + ic->i_send_cq = NULL; + rdsdebug("ib_create_cq send failed: %d\n", ret); + goto out; + } + + ic->i_recv_cq = ib_create_cq(dev, rds_ib_recv_cq_comp_handler, + rds_ib_cq_event_handler, conn, + ic->i_recv_ring.w_nr, 0); + if (IS_ERR(ic->i_recv_cq)) { + ret = PTR_ERR(ic->i_recv_cq); + ic->i_recv_cq = NULL; + rdsdebug("ib_create_cq recv failed: %d\n", ret); + goto out; + } + + ret = ib_req_notify_cq(ic->i_send_cq, IB_CQ_NEXT_COMP); + if (ret) { + rdsdebug("ib_req_notify_cq send failed: %d\n", ret); + goto out; + } + + ret = ib_req_notify_cq(ic->i_recv_cq, IB_CQ_SOLICITED); + if (ret) { + rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); + goto out; + } + + /* XXX negotiate max send/recv with remote? */ + memset(&attr, 0, sizeof(attr)); + attr.event_handler = rds_ib_qp_event_handler; + attr.qp_context = conn; + /* + 1 to allow for the single ack message */ + attr.cap.max_send_wr = ic->i_send_ring.w_nr + 1; + attr.cap.max_recv_wr = ic->i_recv_ring.w_nr + 1; + attr.cap.max_send_sge = rds_ibdev->max_sge; + attr.cap.max_recv_sge = RDS_IB_RECV_SGE; + attr.sq_sig_type = IB_SIGNAL_REQ_WR; + attr.qp_type = IB_QPT_RC; + attr.send_cq = ic->i_send_cq; + attr.recv_cq = ic->i_recv_cq; + + /* + * XXX this can fail if max_*_wr is too large? Are we supposed + * to back off until we get a value that the hardware can support? + */ + ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); + if (ret) { + rdsdebug("rdma_create_qp failed: %d\n", ret); + goto out; + } + + ic->i_send_hdrs = ib_dma_alloc_coherent(dev, + ic->i_send_ring.w_nr * + sizeof(struct rds_header), + &ic->i_send_hdrs_dma, GFP_KERNEL); + if (ic->i_send_hdrs == NULL) { + ret = -ENOMEM; + rdsdebug("ib_dma_alloc_coherent send failed\n"); + goto out; + } + + ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, + ic->i_recv_ring.w_nr * + sizeof(struct rds_header), + &ic->i_recv_hdrs_dma, GFP_KERNEL); + if (ic->i_recv_hdrs == NULL) { + ret = -ENOMEM; + rdsdebug("ib_dma_alloc_coherent recv failed\n"); + goto out; + } + + ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), + &ic->i_ack_dma, GFP_KERNEL); + if (ic->i_ack == NULL) { + ret = -ENOMEM; + rdsdebug("ib_dma_alloc_coherent ack failed\n"); + goto out; + } + + ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_ib_send_work)); + if (ic->i_sends == NULL) { + ret = -ENOMEM; + rdsdebug("send allocation failed\n"); + goto out; + } + rds_ib_send_init_ring(ic); + + ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_ib_recv_work)); + if (ic->i_recvs == NULL) { + ret = -ENOMEM; + rdsdebug("recv allocation failed\n"); + goto out; + } + + rds_ib_recv_init_ring(ic); + rds_ib_recv_init_ack(ic); + + /* Post receive buffers - as a side effect, this will update + * the posted credit count. */ + rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); + + rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, + ic->i_send_cq, ic->i_recv_cq); + +out: + return ret; +} + +static u32 rds_ib_protocol_compatible(const struct rds_ib_connect_private *dp) +{ + u16 common; + u32 version = 0; + + /* rdma_cm private data is odd - when there is any private data in the + * request, we will be given a pretty large buffer without telling us the + * original size. The only way to tell the difference is by looking at + * the contents, which are initialized to zero. + * If the protocol version fields aren't set, this is a connection attempt + * from an older version. This could could be 3.0 or 2.0 - we can't tell. + * We really should have changed this for OFED 1.3 :-( */ + if (dp->dp_protocol_major == 0) + return RDS_PROTOCOL_3_0; + + common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IB_SUPPORTED_PROTOCOLS; + if (dp->dp_protocol_major == 3 && common) { + version = RDS_PROTOCOL_3_0; + while ((common >>= 1) != 0) + version++; + } else if (printk_ratelimit()) { + printk(KERN_NOTICE "RDS: Connection from %pI4 using " + "incompatible protocol version %u.%u\n", + &dp->dp_saddr, + dp->dp_protocol_major, + dp->dp_protocol_minor); + } + return version; +} + +int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + __be64 lguid = cm_id->route.path_rec->sgid.global.interface_id; + __be64 fguid = cm_id->route.path_rec->dgid.global.interface_id; + const struct rds_ib_connect_private *dp = event->param.conn.private_data; + struct rds_ib_connect_private dp_rep; + struct rds_connection *conn = NULL; + struct rds_ib_connection *ic = NULL; + struct rdma_conn_param conn_param; + u32 version; + int err, destroy = 1; + + /* Check whether the remote protocol version matches ours. */ + version = rds_ib_protocol_compatible(dp); + if (!version) + goto out; + + rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u lguid 0x%llx fguid " + "0x%llx\n", &dp->dp_saddr, &dp->dp_daddr, + RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version), + (unsigned long long)be64_to_cpu(lguid), + (unsigned long long)be64_to_cpu(fguid)); + + conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_ib_transport, + GFP_KERNEL); + if (IS_ERR(conn)) { + rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); + conn = NULL; + goto out; + } + + /* + * The connection request may occur while the + * previous connection exist, e.g. in case of failover. + * But as connections may be initiated simultaneously + * by both hosts, we have a random backoff mechanism - + * see the comment above rds_queue_reconnect() + */ + mutex_lock(&conn->c_cm_lock); + if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { + if (rds_conn_state(conn) == RDS_CONN_UP) { + rdsdebug("incoming connect while connecting\n"); + rds_conn_drop(conn); + rds_ib_stats_inc(s_ib_listen_closed_stale); + } else + if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { + /* Wait and see - our connect may still be succeeding */ + rds_ib_stats_inc(s_ib_connect_raced); + } + mutex_unlock(&conn->c_cm_lock); + goto out; + } + + ic = conn->c_transport_data; + + rds_ib_set_protocol(conn, version); + rds_ib_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); + + /* If the peer gave us the last packet it saw, process this as if + * we had received a regular ACK. */ + if (dp->dp_ack_seq) + rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); + + BUG_ON(cm_id->context); + BUG_ON(ic->i_cm_id); + + ic->i_cm_id = cm_id; + cm_id->context = conn; + + /* We got halfway through setting up the ib_connection, if we + * fail now, we have to take the long route out of this mess. */ + destroy = 0; + + err = rds_ib_setup_qp(conn); + if (err) { + rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", err); + goto out; + } + + rds_ib_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); + + /* rdma_accept() calls rdma_reject() internally if it fails */ + err = rdma_accept(cm_id, &conn_param); + mutex_unlock(&conn->c_cm_lock); + if (err) { + rds_ib_conn_error(conn, "rdma_accept failed (%d)\n", err); + goto out; + } + + return 0; + +out: + rdma_reject(cm_id, NULL, 0); + return destroy; +} + + +int rds_ib_cm_initiate_connect(struct rdma_cm_id *cm_id) +{ + struct rds_connection *conn = cm_id->context; + struct rds_ib_connection *ic = conn->c_transport_data; + struct rdma_conn_param conn_param; + struct rds_ib_connect_private dp; + int ret; + + /* If the peer doesn't do protocol negotiation, we must + * default to RDSv3.0 */ + rds_ib_set_protocol(conn, RDS_PROTOCOL_3_0); + ic->i_flowctl = rds_ib_sysctl_flow_control; /* advertise flow control */ + + ret = rds_ib_setup_qp(conn); + if (ret) { + rds_ib_conn_error(conn, "rds_ib_setup_qp failed (%d)\n", ret); + goto out; + } + + rds_ib_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); + + ret = rdma_connect(cm_id, &conn_param); + if (ret) + rds_ib_conn_error(conn, "rdma_connect failed (%d)\n", ret); + +out: + /* Beware - returning non-zero tells the rdma_cm to destroy + * the cm_id. We should certainly not do it as long as we still + * "own" the cm_id. */ + if (ret) { + if (ic->i_cm_id == cm_id) + ret = 0; + } + return ret; +} + +int rds_ib_conn_connect(struct rds_connection *conn) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct sockaddr_in src, dest; + int ret; + + /* XXX I wonder what affect the port space has */ + /* delegate cm event handler to rdma_transport */ + ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, + RDMA_PS_TCP); + if (IS_ERR(ic->i_cm_id)) { + ret = PTR_ERR(ic->i_cm_id); + ic->i_cm_id = NULL; + rdsdebug("rdma_create_id() failed: %d\n", ret); + goto out; + } + + rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn); + + src.sin_family = AF_INET; + src.sin_addr.s_addr = (__force u32)conn->c_laddr; + src.sin_port = (__force u16)htons(0); + + dest.sin_family = AF_INET; + dest.sin_addr.s_addr = (__force u32)conn->c_faddr; + dest.sin_port = (__force u16)htons(RDS_PORT); + + ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src, + (struct sockaddr *)&dest, + RDS_RDMA_RESOLVE_TIMEOUT_MS); + if (ret) { + rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id, + ret); + rdma_destroy_id(ic->i_cm_id); + ic->i_cm_id = NULL; + } + +out: + return ret; +} + +/* + * This is so careful about only cleaning up resources that were built up + * so that it can be called at any point during startup. In fact it + * can be called multiple times for a given connection. + */ +void rds_ib_conn_shutdown(struct rds_connection *conn) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + int err = 0; + + rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id, + ic->i_pd, ic->i_send_cq, ic->i_recv_cq, + ic->i_cm_id ? ic->i_cm_id->qp : NULL); + + if (ic->i_cm_id) { + struct ib_device *dev = ic->i_cm_id->device; + + rdsdebug("disconnecting cm %p\n", ic->i_cm_id); + err = rdma_disconnect(ic->i_cm_id); + if (err) { + /* Actually this may happen quite frequently, when + * an outgoing connect raced with an incoming connect. + */ + rdsdebug("failed to disconnect, cm: %p err %d\n", + ic->i_cm_id, err); + } + + wait_event(rds_ib_ring_empty_wait, + rds_ib_ring_empty(&ic->i_send_ring) && + rds_ib_ring_empty(&ic->i_recv_ring)); + + if (ic->i_send_hdrs) + ib_dma_free_coherent(dev, + ic->i_send_ring.w_nr * + sizeof(struct rds_header), + ic->i_send_hdrs, + ic->i_send_hdrs_dma); + + if (ic->i_recv_hdrs) + ib_dma_free_coherent(dev, + ic->i_recv_ring.w_nr * + sizeof(struct rds_header), + ic->i_recv_hdrs, + ic->i_recv_hdrs_dma); + + if (ic->i_ack) + ib_dma_free_coherent(dev, sizeof(struct rds_header), + ic->i_ack, ic->i_ack_dma); + + if (ic->i_sends) + rds_ib_send_clear_ring(ic); + if (ic->i_recvs) + rds_ib_recv_clear_ring(ic); + + if (ic->i_cm_id->qp) + rdma_destroy_qp(ic->i_cm_id); + if (ic->i_send_cq) + ib_destroy_cq(ic->i_send_cq); + if (ic->i_recv_cq) + ib_destroy_cq(ic->i_recv_cq); + rdma_destroy_id(ic->i_cm_id); + + /* + * Move connection back to the nodev list. + */ + if (ic->rds_ibdev) { + + spin_lock_irq(&ic->rds_ibdev->spinlock); + BUG_ON(list_empty(&ic->ib_node)); + list_del(&ic->ib_node); + spin_unlock_irq(&ic->rds_ibdev->spinlock); + + spin_lock_irq(&ib_nodev_conns_lock); + list_add_tail(&ic->ib_node, &ib_nodev_conns); + spin_unlock_irq(&ib_nodev_conns_lock); + ic->rds_ibdev = NULL; + } + + ic->i_cm_id = NULL; + ic->i_pd = NULL; + ic->i_mr = NULL; + ic->i_send_cq = NULL; + ic->i_recv_cq = NULL; + ic->i_send_hdrs = NULL; + ic->i_recv_hdrs = NULL; + ic->i_ack = NULL; + } + BUG_ON(ic->rds_ibdev); + + /* Clear pending transmit */ + if (ic->i_rm) { + rds_message_put(ic->i_rm); + ic->i_rm = NULL; + } + + /* Clear the ACK state */ + clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); + rds_ib_set_64bit(&ic->i_ack_next, 0); + ic->i_ack_recv = 0; + + /* Clear flow control state */ + ic->i_flowctl = 0; + atomic_set(&ic->i_credits, 0); + + rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); + rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); + + if (ic->i_ibinc) { + rds_inc_put(&ic->i_ibinc->ii_inc); + ic->i_ibinc = NULL; + } + + vfree(ic->i_sends); + ic->i_sends = NULL; + vfree(ic->i_recvs); + ic->i_recvs = NULL; +} + +int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) +{ + struct rds_ib_connection *ic; + unsigned long flags; + + /* XXX too lazy? */ + ic = kzalloc(sizeof(struct rds_ib_connection), GFP_KERNEL); + if (ic == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&ic->ib_node); + mutex_init(&ic->i_recv_mutex); + + /* + * rds_ib_conn_shutdown() waits for these to be emptied so they + * must be initialized before it can be called. + */ + rds_ib_ring_init(&ic->i_send_ring, rds_ib_sysctl_max_send_wr); + rds_ib_ring_init(&ic->i_recv_ring, rds_ib_sysctl_max_recv_wr); + + ic->conn = conn; + conn->c_transport_data = ic; + + spin_lock_irqsave(&ib_nodev_conns_lock, flags); + list_add_tail(&ic->ib_node, &ib_nodev_conns); + spin_unlock_irqrestore(&ib_nodev_conns_lock, flags); + + + rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data); + return 0; +} + +void rds_ib_conn_free(void *arg) +{ + struct rds_ib_connection *ic = arg; + rdsdebug("ic %p\n", ic); + list_del(&ic->ib_node); + kfree(ic); +} + + +/* + * An error occurred on the connection + */ +void +__rds_ib_conn_error(struct rds_connection *conn, const char *fmt, ...) +{ + va_list ap; + + rds_conn_drop(conn); + + va_start(ap, fmt); + vprintk(fmt, ap); + va_end(ap); +} -- cgit v1.2.3 From f528efe276ec16603b78f95fb63ca26b5b4c87dc Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:31 +0000 Subject: RDS/IB: Ring-handling code. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_ring.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 net/rds/ib_ring.c (limited to 'net') diff --git a/net/rds/ib_ring.c b/net/rds/ib_ring.c new file mode 100644 index 000000000000..99a6ccae964c --- /dev/null +++ b/net/rds/ib_ring.c @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "rds.h" +#include "ib.h" + +/* + * Locking for IB rings. + * We assume that allocation is always protected by a mutex + * in the caller (this is a valid assumption for the current + * implementation). + * + * Freeing always happens in an interrupt, and hence only + * races with allocations, but not with other free()s. + * + * The interaction between allocation and freeing is that + * the alloc code has to determine the number of free entries. + * To this end, we maintain two counters; an allocation counter + * and a free counter. Both are allowed to run freely, and wrap + * around. + * The number of used entries is always (alloc_ctr - free_ctr) % NR. + * + * The current implementation makes free_ctr atomic. When the + * caller finds an allocation fails, it should set an "alloc fail" + * bit and retry the allocation. The "alloc fail" bit essentially tells + * the CQ completion handlers to wake it up after freeing some + * more entries. + */ + +/* + * This only happens on shutdown. + */ +DECLARE_WAIT_QUEUE_HEAD(rds_ib_ring_empty_wait); + +void rds_ib_ring_init(struct rds_ib_work_ring *ring, u32 nr) +{ + memset(ring, 0, sizeof(*ring)); + ring->w_nr = nr; + rdsdebug("ring %p nr %u\n", ring, ring->w_nr); +} + +static inline u32 __rds_ib_ring_used(struct rds_ib_work_ring *ring) +{ + u32 diff; + + /* This assumes that atomic_t has at least as many bits as u32 */ + diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr); + BUG_ON(diff > ring->w_nr); + + return diff; +} + +void rds_ib_ring_resize(struct rds_ib_work_ring *ring, u32 nr) +{ + /* We only ever get called from the connection setup code, + * prior to creating the QP. */ + BUG_ON(__rds_ib_ring_used(ring)); + ring->w_nr = nr; +} + +static int __rds_ib_ring_empty(struct rds_ib_work_ring *ring) +{ + return __rds_ib_ring_used(ring) == 0; +} + +u32 rds_ib_ring_alloc(struct rds_ib_work_ring *ring, u32 val, u32 *pos) +{ + u32 ret = 0, avail; + + avail = ring->w_nr - __rds_ib_ring_used(ring); + + rdsdebug("ring %p val %u next %u free %u\n", ring, val, + ring->w_alloc_ptr, avail); + + if (val && avail) { + ret = min(val, avail); + *pos = ring->w_alloc_ptr; + + ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr; + ring->w_alloc_ctr += ret; + } + + return ret; +} + +void rds_ib_ring_free(struct rds_ib_work_ring *ring, u32 val) +{ + ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr; + atomic_add(val, &ring->w_free_ctr); + + if (__rds_ib_ring_empty(ring) && + waitqueue_active(&rds_ib_ring_empty_wait)) + wake_up(&rds_ib_ring_empty_wait); +} + +void rds_ib_ring_unalloc(struct rds_ib_work_ring *ring, u32 val) +{ + ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr; + ring->w_alloc_ctr -= val; +} + +int rds_ib_ring_empty(struct rds_ib_work_ring *ring) +{ + return __rds_ib_ring_empty(ring); +} + +int rds_ib_ring_low(struct rds_ib_work_ring *ring) +{ + return __rds_ib_ring_used(ring) <= (ring->w_nr >> 2); +} + +/* + * returns the oldest alloced ring entry. This will be the next one + * freed. This can't be called if there are none allocated. + */ +u32 rds_ib_ring_oldest(struct rds_ib_work_ring *ring) +{ + return ring->w_free_ptr; +} + +/* + * returns the number of completed work requests. + */ + +u32 rds_ib_ring_completed(struct rds_ib_work_ring *ring, u32 wr_id, u32 oldest) +{ + u32 ret; + + if (oldest <= (unsigned long long)wr_id) + ret = (unsigned long long)wr_id - oldest + 1; + else + ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1; + + rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret, + wr_id, oldest); + return ret; +} -- cgit v1.2.3 From 08b48a1ed84b19b602cbe979184ad06e7e1c025e Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:32 +0000 Subject: RDS/IB: Implement RDMA ops using FMRs Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_rdma.c | 641 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 641 insertions(+) create mode 100644 net/rds/ib_rdma.c (limited to 'net') diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c new file mode 100644 index 000000000000..69a6289ed672 --- /dev/null +++ b/net/rds/ib_rdma.c @@ -0,0 +1,641 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "rds.h" +#include "rdma.h" +#include "ib.h" + + +/* + * This is stored as mr->r_trans_private. + */ +struct rds_ib_mr { + struct rds_ib_device *device; + struct rds_ib_mr_pool *pool; + struct ib_fmr *fmr; + struct list_head list; + unsigned int remap_count; + + struct scatterlist *sg; + unsigned int sg_len; + u64 *dma; + int sg_dma_len; +}; + +/* + * Our own little FMR pool + */ +struct rds_ib_mr_pool { + struct mutex flush_lock; /* serialize fmr invalidate */ + struct work_struct flush_worker; /* flush worker */ + + spinlock_t list_lock; /* protect variables below */ + atomic_t item_count; /* total # of MRs */ + atomic_t dirty_count; /* # dirty of MRs */ + struct list_head drop_list; /* MRs that have reached their max_maps limit */ + struct list_head free_list; /* unused MRs */ + struct list_head clean_list; /* unused & unamapped MRs */ + atomic_t free_pinned; /* memory pinned by free MRs */ + unsigned long max_items; + unsigned long max_items_soft; + unsigned long max_free_pinned; + struct ib_fmr_attr fmr_attr; +}; + +static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all); +static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr); +static void rds_ib_mr_pool_flush_worker(struct work_struct *work); + +static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) +{ + struct rds_ib_device *rds_ibdev; + struct rds_ib_ipaddr *i_ipaddr; + + list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { + spin_lock_irq(&rds_ibdev->spinlock); + list_for_each_entry(i_ipaddr, &rds_ibdev->ipaddr_list, list) { + if (i_ipaddr->ipaddr == ipaddr) { + spin_unlock_irq(&rds_ibdev->spinlock); + return rds_ibdev; + } + } + spin_unlock_irq(&rds_ibdev->spinlock); + } + + return NULL; +} + +static int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) +{ + struct rds_ib_ipaddr *i_ipaddr; + + i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL); + if (!i_ipaddr) + return -ENOMEM; + + i_ipaddr->ipaddr = ipaddr; + + spin_lock_irq(&rds_ibdev->spinlock); + list_add_tail(&i_ipaddr->list, &rds_ibdev->ipaddr_list); + spin_unlock_irq(&rds_ibdev->spinlock); + + return 0; +} + +static void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) +{ + struct rds_ib_ipaddr *i_ipaddr, *next; + + spin_lock_irq(&rds_ibdev->spinlock); + list_for_each_entry_safe(i_ipaddr, next, &rds_ibdev->ipaddr_list, list) { + if (i_ipaddr->ipaddr == ipaddr) { + list_del(&i_ipaddr->list); + kfree(i_ipaddr); + break; + } + } + spin_unlock_irq(&rds_ibdev->spinlock); +} + +int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) +{ + struct rds_ib_device *rds_ibdev_old; + + rds_ibdev_old = rds_ib_get_device(ipaddr); + if (rds_ibdev_old) + rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr); + + return rds_ib_add_ipaddr(rds_ibdev, ipaddr); +} + +int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + + /* conn was previously on the nodev_conns_list */ + spin_lock_irq(&ib_nodev_conns_lock); + BUG_ON(list_empty(&ib_nodev_conns)); + BUG_ON(list_empty(&ic->ib_node)); + list_del(&ic->ib_node); + spin_unlock_irq(&ib_nodev_conns_lock); + + spin_lock_irq(&rds_ibdev->spinlock); + list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); + spin_unlock_irq(&rds_ibdev->spinlock); + + ic->rds_ibdev = rds_ibdev; + + return 0; +} + +void rds_ib_remove_nodev_conns(void) +{ + struct rds_ib_connection *ic, *_ic; + LIST_HEAD(tmp_list); + + /* avoid calling conn_destroy with irqs off */ + spin_lock_irq(&ib_nodev_conns_lock); + list_splice(&ib_nodev_conns, &tmp_list); + INIT_LIST_HEAD(&ib_nodev_conns); + spin_unlock_irq(&ib_nodev_conns_lock); + + list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { + if (ic->conn->c_passive) + rds_conn_destroy(ic->conn->c_passive); + rds_conn_destroy(ic->conn); + } +} + +void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) +{ + struct rds_ib_connection *ic, *_ic; + LIST_HEAD(tmp_list); + + /* avoid calling conn_destroy with irqs off */ + spin_lock_irq(&rds_ibdev->spinlock); + list_splice(&rds_ibdev->conn_list, &tmp_list); + INIT_LIST_HEAD(&rds_ibdev->conn_list); + spin_unlock_irq(&rds_ibdev->spinlock); + + list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { + if (ic->conn->c_passive) + rds_conn_destroy(ic->conn->c_passive); + rds_conn_destroy(ic->conn); + } +} + +struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev) +{ + struct rds_ib_mr_pool *pool; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&pool->free_list); + INIT_LIST_HEAD(&pool->drop_list); + INIT_LIST_HEAD(&pool->clean_list); + mutex_init(&pool->flush_lock); + spin_lock_init(&pool->list_lock); + INIT_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); + + pool->fmr_attr.max_pages = fmr_message_size; + pool->fmr_attr.max_maps = rds_ibdev->fmr_max_remaps; + pool->fmr_attr.page_shift = rds_ibdev->fmr_page_shift; + pool->max_free_pinned = rds_ibdev->max_fmrs * fmr_message_size / 4; + + /* We never allow more than max_items MRs to be allocated. + * When we exceed more than max_items_soft, we start freeing + * items more aggressively. + * Make sure that max_items > max_items_soft > max_items / 2 + */ + pool->max_items_soft = rds_ibdev->max_fmrs * 3 / 4; + pool->max_items = rds_ibdev->max_fmrs; + + return pool; +} + +void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) +{ + struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; + + iinfo->rdma_mr_max = pool->max_items; + iinfo->rdma_mr_size = pool->fmr_attr.max_pages; +} + +void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) +{ + flush_workqueue(rds_wq); + rds_ib_flush_mr_pool(pool, 1); + BUG_ON(atomic_read(&pool->item_count)); + BUG_ON(atomic_read(&pool->free_pinned)); + kfree(pool); +} + +static inline struct rds_ib_mr *rds_ib_reuse_fmr(struct rds_ib_mr_pool *pool) +{ + struct rds_ib_mr *ibmr = NULL; + unsigned long flags; + + spin_lock_irqsave(&pool->list_lock, flags); + if (!list_empty(&pool->clean_list)) { + ibmr = list_entry(pool->clean_list.next, struct rds_ib_mr, list); + list_del_init(&ibmr->list); + } + spin_unlock_irqrestore(&pool->list_lock, flags); + + return ibmr; +} + +static struct rds_ib_mr *rds_ib_alloc_fmr(struct rds_ib_device *rds_ibdev) +{ + struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; + struct rds_ib_mr *ibmr = NULL; + int err = 0, iter = 0; + + while (1) { + ibmr = rds_ib_reuse_fmr(pool); + if (ibmr) + return ibmr; + + /* No clean MRs - now we have the choice of either + * allocating a fresh MR up to the limit imposed by the + * driver, or flush any dirty unused MRs. + * We try to avoid stalling in the send path if possible, + * so we allocate as long as we're allowed to. + * + * We're fussy with enforcing the FMR limit, though. If the driver + * tells us we can't use more than N fmrs, we shouldn't start + * arguing with it */ + if (atomic_inc_return(&pool->item_count) <= pool->max_items) + break; + + atomic_dec(&pool->item_count); + + if (++iter > 2) { + rds_ib_stats_inc(s_ib_rdma_mr_pool_depleted); + return ERR_PTR(-EAGAIN); + } + + /* We do have some empty MRs. Flush them out. */ + rds_ib_stats_inc(s_ib_rdma_mr_pool_wait); + rds_ib_flush_mr_pool(pool, 0); + } + + ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); + if (!ibmr) { + err = -ENOMEM; + goto out_no_cigar; + } + + ibmr->fmr = ib_alloc_fmr(rds_ibdev->pd, + (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE), + &pool->fmr_attr); + if (IS_ERR(ibmr->fmr)) { + err = PTR_ERR(ibmr->fmr); + ibmr->fmr = NULL; + printk(KERN_WARNING "RDS/IB: ib_alloc_fmr failed (err=%d)\n", err); + goto out_no_cigar; + } + + rds_ib_stats_inc(s_ib_rdma_mr_alloc); + return ibmr; + +out_no_cigar: + if (ibmr) { + if (ibmr->fmr) + ib_dealloc_fmr(ibmr->fmr); + kfree(ibmr); + } + atomic_dec(&pool->item_count); + return ERR_PTR(err); +} + +static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, struct rds_ib_mr *ibmr, + struct scatterlist *sg, unsigned int nents) +{ + struct ib_device *dev = rds_ibdev->dev; + struct scatterlist *scat = sg; + u64 io_addr = 0; + u64 *dma_pages; + u32 len; + int page_cnt, sg_dma_len; + int i, j; + int ret; + + sg_dma_len = ib_dma_map_sg(dev, sg, nents, + DMA_BIDIRECTIONAL); + if (unlikely(!sg_dma_len)) { + printk(KERN_WARNING "RDS/IB: dma_map_sg failed!\n"); + return -EBUSY; + } + + len = 0; + page_cnt = 0; + + for (i = 0; i < sg_dma_len; ++i) { + unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); + u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); + + if (dma_addr & ~rds_ibdev->fmr_page_mask) { + if (i > 0) + return -EINVAL; + else + ++page_cnt; + } + if ((dma_addr + dma_len) & ~rds_ibdev->fmr_page_mask) { + if (i < sg_dma_len - 1) + return -EINVAL; + else + ++page_cnt; + } + + len += dma_len; + } + + page_cnt += len >> rds_ibdev->fmr_page_shift; + if (page_cnt > fmr_message_size) + return -EINVAL; + + dma_pages = kmalloc(sizeof(u64) * page_cnt, GFP_ATOMIC); + if (!dma_pages) + return -ENOMEM; + + page_cnt = 0; + for (i = 0; i < sg_dma_len; ++i) { + unsigned int dma_len = ib_sg_dma_len(dev, &scat[i]); + u64 dma_addr = ib_sg_dma_address(dev, &scat[i]); + + for (j = 0; j < dma_len; j += rds_ibdev->fmr_page_size) + dma_pages[page_cnt++] = + (dma_addr & rds_ibdev->fmr_page_mask) + j; + } + + ret = ib_map_phys_fmr(ibmr->fmr, + dma_pages, page_cnt, io_addr); + if (ret) + goto out; + + /* Success - we successfully remapped the MR, so we can + * safely tear down the old mapping. */ + rds_ib_teardown_mr(ibmr); + + ibmr->sg = scat; + ibmr->sg_len = nents; + ibmr->sg_dma_len = sg_dma_len; + ibmr->remap_count++; + + rds_ib_stats_inc(s_ib_rdma_mr_used); + ret = 0; + +out: + kfree(dma_pages); + + return ret; +} + +void rds_ib_sync_mr(void *trans_private, int direction) +{ + struct rds_ib_mr *ibmr = trans_private; + struct rds_ib_device *rds_ibdev = ibmr->device; + + switch (direction) { + case DMA_FROM_DEVICE: + ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg, + ibmr->sg_dma_len, DMA_BIDIRECTIONAL); + break; + case DMA_TO_DEVICE: + ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg, + ibmr->sg_dma_len, DMA_BIDIRECTIONAL); + break; + } +} + +static void __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) +{ + struct rds_ib_device *rds_ibdev = ibmr->device; + + if (ibmr->sg_dma_len) { + ib_dma_unmap_sg(rds_ibdev->dev, + ibmr->sg, ibmr->sg_len, + DMA_BIDIRECTIONAL); + ibmr->sg_dma_len = 0; + } + + /* Release the s/g list */ + if (ibmr->sg_len) { + unsigned int i; + + for (i = 0; i < ibmr->sg_len; ++i) { + struct page *page = sg_page(&ibmr->sg[i]); + + /* FIXME we need a way to tell a r/w MR + * from a r/o MR */ + set_page_dirty(page); + put_page(page); + } + kfree(ibmr->sg); + + ibmr->sg = NULL; + ibmr->sg_len = 0; + } +} + +static void rds_ib_teardown_mr(struct rds_ib_mr *ibmr) +{ + unsigned int pinned = ibmr->sg_len; + + __rds_ib_teardown_mr(ibmr); + if (pinned) { + struct rds_ib_device *rds_ibdev = ibmr->device; + struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; + + atomic_sub(pinned, &pool->free_pinned); + } +} + +static inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all) +{ + unsigned int item_count; + + item_count = atomic_read(&pool->item_count); + if (free_all) + return item_count; + + return 0; +} + +/* + * Flush our pool of MRs. + * At a minimum, all currently unused MRs are unmapped. + * If the number of MRs allocated exceeds the limit, we also try + * to free as many MRs as needed to get back to this limit. + */ +static int rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, int free_all) +{ + struct rds_ib_mr *ibmr, *next; + LIST_HEAD(unmap_list); + LIST_HEAD(fmr_list); + unsigned long unpinned = 0; + unsigned long flags; + unsigned int nfreed = 0, ncleaned = 0, free_goal; + int ret = 0; + + rds_ib_stats_inc(s_ib_rdma_mr_pool_flush); + + mutex_lock(&pool->flush_lock); + + spin_lock_irqsave(&pool->list_lock, flags); + /* Get the list of all MRs to be dropped. Ordering matters - + * we want to put drop_list ahead of free_list. */ + list_splice_init(&pool->free_list, &unmap_list); + list_splice_init(&pool->drop_list, &unmap_list); + if (free_all) + list_splice_init(&pool->clean_list, &unmap_list); + spin_unlock_irqrestore(&pool->list_lock, flags); + + free_goal = rds_ib_flush_goal(pool, free_all); + + if (list_empty(&unmap_list)) + goto out; + + /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ + list_for_each_entry(ibmr, &unmap_list, list) + list_add(&ibmr->fmr->list, &fmr_list); + ret = ib_unmap_fmr(&fmr_list); + if (ret) + printk(KERN_WARNING "RDS/IB: ib_unmap_fmr failed (err=%d)\n", ret); + + /* Now we can destroy the DMA mapping and unpin any pages */ + list_for_each_entry_safe(ibmr, next, &unmap_list, list) { + unpinned += ibmr->sg_len; + __rds_ib_teardown_mr(ibmr); + if (nfreed < free_goal || ibmr->remap_count >= pool->fmr_attr.max_maps) { + rds_ib_stats_inc(s_ib_rdma_mr_free); + list_del(&ibmr->list); + ib_dealloc_fmr(ibmr->fmr); + kfree(ibmr); + nfreed++; + } + ncleaned++; + } + + spin_lock_irqsave(&pool->list_lock, flags); + list_splice(&unmap_list, &pool->clean_list); + spin_unlock_irqrestore(&pool->list_lock, flags); + + atomic_sub(unpinned, &pool->free_pinned); + atomic_sub(ncleaned, &pool->dirty_count); + atomic_sub(nfreed, &pool->item_count); + +out: + mutex_unlock(&pool->flush_lock); + return ret; +} + +static void rds_ib_mr_pool_flush_worker(struct work_struct *work) +{ + struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker); + + rds_ib_flush_mr_pool(pool, 0); +} + +void rds_ib_free_mr(void *trans_private, int invalidate) +{ + struct rds_ib_mr *ibmr = trans_private; + struct rds_ib_device *rds_ibdev = ibmr->device; + struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; + unsigned long flags; + + rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); + + /* Return it to the pool's free list */ + spin_lock_irqsave(&pool->list_lock, flags); + if (ibmr->remap_count >= pool->fmr_attr.max_maps) + list_add(&ibmr->list, &pool->drop_list); + else + list_add(&ibmr->list, &pool->free_list); + + atomic_add(ibmr->sg_len, &pool->free_pinned); + atomic_inc(&pool->dirty_count); + spin_unlock_irqrestore(&pool->list_lock, flags); + + /* If we've pinned too many pages, request a flush */ + if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned + || atomic_read(&pool->dirty_count) >= pool->max_items / 10) + queue_work(rds_wq, &pool->flush_worker); + + if (invalidate) { + if (likely(!in_interrupt())) { + rds_ib_flush_mr_pool(pool, 0); + } else { + /* We get here if the user created a MR marked + * as use_once and invalidate at the same time. */ + queue_work(rds_wq, &pool->flush_worker); + } + } +} + +void rds_ib_flush_mrs(void) +{ + struct rds_ib_device *rds_ibdev; + + list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { + struct rds_ib_mr_pool *pool = rds_ibdev->mr_pool; + + if (pool) + rds_ib_flush_mr_pool(pool, 0); + } +} + +void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, + struct rds_sock *rs, u32 *key_ret) +{ + struct rds_ib_device *rds_ibdev; + struct rds_ib_mr *ibmr = NULL; + int ret; + + rds_ibdev = rds_ib_get_device(rs->rs_bound_addr); + if (!rds_ibdev) { + ret = -ENODEV; + goto out; + } + + if (!rds_ibdev->mr_pool) { + ret = -ENODEV; + goto out; + } + + ibmr = rds_ib_alloc_fmr(rds_ibdev); + if (IS_ERR(ibmr)) + return ibmr; + + ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents); + if (ret == 0) + *key_ret = ibmr->fmr->rkey; + else + printk(KERN_WARNING "RDS/IB: map_fmr failed (errno=%d)\n", ret); + + ibmr->device = rds_ibdev; + + out: + if (ret) { + if (ibmr) + rds_ib_free_mr(ibmr, 0); + ibmr = ERR_PTR(ret); + } + return ibmr; +} -- cgit v1.2.3 From 6a0979df32296c3ba75a346db47a18292a231c6e Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:33 +0000 Subject: RDS/IB: Implement IB-specific datagram send. Specific to IB is a credits-based flow control mechanism, in addition to the expected usage of the IB API to package outgoing data into work requests. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_send.c | 874 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 874 insertions(+) create mode 100644 net/rds/ib_send.c (limited to 'net') diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c new file mode 100644 index 000000000000..cb6c52cb1c4c --- /dev/null +++ b/net/rds/ib_send.c @@ -0,0 +1,874 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include + +#include "rds.h" +#include "rdma.h" +#include "ib.h" + +static void rds_ib_send_rdma_complete(struct rds_message *rm, + int wc_status) +{ + int notify_status; + + switch (wc_status) { + case IB_WC_WR_FLUSH_ERR: + return; + + case IB_WC_SUCCESS: + notify_status = RDS_RDMA_SUCCESS; + break; + + case IB_WC_REM_ACCESS_ERR: + notify_status = RDS_RDMA_REMOTE_ERROR; + break; + + default: + notify_status = RDS_RDMA_OTHER_ERROR; + break; + } + rds_rdma_send_complete(rm, notify_status); +} + +static void rds_ib_send_unmap_rdma(struct rds_ib_connection *ic, + struct rds_rdma_op *op) +{ + if (op->r_mapped) { + ib_dma_unmap_sg(ic->i_cm_id->device, + op->r_sg, op->r_nents, + op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + op->r_mapped = 0; + } +} + +static void rds_ib_send_unmap_rm(struct rds_ib_connection *ic, + struct rds_ib_send_work *send, + int wc_status) +{ + struct rds_message *rm = send->s_rm; + + rdsdebug("ic %p send %p rm %p\n", ic, send, rm); + + ib_dma_unmap_sg(ic->i_cm_id->device, + rm->m_sg, rm->m_nents, + DMA_TO_DEVICE); + + if (rm->m_rdma_op != NULL) { + rds_ib_send_unmap_rdma(ic, rm->m_rdma_op); + + /* If the user asked for a completion notification on this + * message, we can implement three different semantics: + * 1. Notify when we received the ACK on the RDS message + * that was queued with the RDMA. This provides reliable + * notification of RDMA status at the expense of a one-way + * packet delay. + * 2. Notify when the IB stack gives us the completion event for + * the RDMA operation. + * 3. Notify when the IB stack gives us the completion event for + * the accompanying RDS messages. + * Here, we implement approach #3. To implement approach #2, + * call rds_rdma_send_complete from the cq_handler. To implement #1, + * don't call rds_rdma_send_complete at all, and fall back to the notify + * handling in the ACK processing code. + * + * Note: There's no need to explicitly sync any RDMA buffers using + * ib_dma_sync_sg_for_cpu - the completion for the RDMA + * operation itself unmapped the RDMA buffers, which takes care + * of synching. + */ + rds_ib_send_rdma_complete(rm, wc_status); + + if (rm->m_rdma_op->r_write) + rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); + else + rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); + } + + /* If anyone waited for this message to get flushed out, wake + * them up now */ + rds_message_unmapped(rm); + + rds_message_put(rm); + send->s_rm = NULL; +} + +void rds_ib_send_init_ring(struct rds_ib_connection *ic) +{ + struct rds_ib_send_work *send; + u32 i; + + for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { + struct ib_sge *sge; + + send->s_rm = NULL; + send->s_op = NULL; + + send->s_wr.wr_id = i; + send->s_wr.sg_list = send->s_sge; + send->s_wr.num_sge = 1; + send->s_wr.opcode = IB_WR_SEND; + send->s_wr.send_flags = 0; + send->s_wr.ex.imm_data = 0; + + sge = rds_ib_data_sge(ic, send->s_sge); + sge->lkey = ic->i_mr->lkey; + + sge = rds_ib_header_sge(ic, send->s_sge); + sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); + sge->length = sizeof(struct rds_header); + sge->lkey = ic->i_mr->lkey; + } +} + +void rds_ib_send_clear_ring(struct rds_ib_connection *ic) +{ + struct rds_ib_send_work *send; + u32 i; + + for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { + if (send->s_wr.opcode == 0xdead) + continue; + if (send->s_rm) + rds_ib_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); + if (send->s_op) + rds_ib_send_unmap_rdma(ic, send->s_op); + } +} + +/* + * The _oldest/_free ring operations here race cleanly with the alloc/unalloc + * operations performed in the send path. As the sender allocs and potentially + * unallocs the next free entry in the ring it doesn't alter which is + * the next to be freed, which is what this is concerned with. + */ +void rds_ib_send_cq_comp_handler(struct ib_cq *cq, void *context) +{ + struct rds_connection *conn = context; + struct rds_ib_connection *ic = conn->c_transport_data; + struct ib_wc wc; + struct rds_ib_send_work *send; + u32 completed; + u32 oldest; + u32 i = 0; + int ret; + + rdsdebug("cq %p conn %p\n", cq, conn); + rds_ib_stats_inc(s_ib_tx_cq_call); + ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + if (ret) + rdsdebug("ib_req_notify_cq send failed: %d\n", ret); + + while (ib_poll_cq(cq, 1, &wc) > 0) { + rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", + (unsigned long long)wc.wr_id, wc.status, wc.byte_len, + be32_to_cpu(wc.ex.imm_data)); + rds_ib_stats_inc(s_ib_tx_cq_event); + + if (wc.wr_id == RDS_IB_ACK_WR_ID) { + if (ic->i_ack_queued + HZ/2 < jiffies) + rds_ib_stats_inc(s_ib_tx_stalled); + rds_ib_ack_send_complete(ic); + continue; + } + + oldest = rds_ib_ring_oldest(&ic->i_send_ring); + + completed = rds_ib_ring_completed(&ic->i_send_ring, wc.wr_id, oldest); + + for (i = 0; i < completed; i++) { + send = &ic->i_sends[oldest]; + + /* In the error case, wc.opcode sometimes contains garbage */ + switch (send->s_wr.opcode) { + case IB_WR_SEND: + if (send->s_rm) + rds_ib_send_unmap_rm(ic, send, wc.status); + break; + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_READ: + /* Nothing to be done - the SG list will be unmapped + * when the SEND completes. */ + break; + default: + if (printk_ratelimit()) + printk(KERN_NOTICE + "RDS/IB: %s: unexpected opcode 0x%x in WR!\n", + __func__, send->s_wr.opcode); + break; + } + + send->s_wr.opcode = 0xdead; + send->s_wr.num_sge = 1; + if (send->s_queued + HZ/2 < jiffies) + rds_ib_stats_inc(s_ib_tx_stalled); + + /* If a RDMA operation produced an error, signal this right + * away. If we don't, the subsequent SEND that goes with this + * RDMA will be canceled with ERR_WFLUSH, and the application + * never learn that the RDMA failed. */ + if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { + struct rds_message *rm; + + rm = rds_send_get_message(conn, send->s_op); + if (rm) + rds_ib_send_rdma_complete(rm, wc.status); + } + + oldest = (oldest + 1) % ic->i_send_ring.w_nr; + } + + rds_ib_ring_free(&ic->i_send_ring, completed); + + if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) + || test_bit(0, &conn->c_map_queued)) + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + + /* We expect errors as the qp is drained during shutdown */ + if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { + rds_ib_conn_error(conn, + "send completion on %pI4 " + "had status %u, disconnecting and reconnecting\n", + &conn->c_faddr, wc.status); + } + } +} + +/* + * This is the main function for allocating credits when sending + * messages. + * + * Conceptually, we have two counters: + * - send credits: this tells us how many WRs we're allowed + * to submit without overruning the reciever's queue. For + * each SEND WR we post, we decrement this by one. + * + * - posted credits: this tells us how many WRs we recently + * posted to the receive queue. This value is transferred + * to the peer as a "credit update" in a RDS header field. + * Every time we transmit credits to the peer, we subtract + * the amount of transferred credits from this counter. + * + * It is essential that we avoid situations where both sides have + * exhausted their send credits, and are unable to send new credits + * to the peer. We achieve this by requiring that we send at least + * one credit update to the peer before exhausting our credits. + * When new credits arrive, we subtract one credit that is withheld + * until we've posted new buffers and are ready to transmit these + * credits (see rds_ib_send_add_credits below). + * + * The RDS send code is essentially single-threaded; rds_send_xmit + * grabs c_send_lock to ensure exclusive access to the send ring. + * However, the ACK sending code is independent and can race with + * message SENDs. + * + * In the send path, we need to update the counters for send credits + * and the counter of posted buffers atomically - when we use the + * last available credit, we cannot allow another thread to race us + * and grab the posted credits counter. Hence, we have to use a + * spinlock to protect the credit counter, or use atomics. + * + * Spinlocks shared between the send and the receive path are bad, + * because they create unnecessary delays. An early implementation + * using a spinlock showed a 5% degradation in throughput at some + * loads. + * + * This implementation avoids spinlocks completely, putting both + * counters into a single atomic, and updating that atomic using + * atomic_add (in the receive path, when receiving fresh credits), + * and using atomic_cmpxchg when updating the two counters. + */ +int rds_ib_send_grab_credits(struct rds_ib_connection *ic, + u32 wanted, u32 *adv_credits, int need_posted) +{ + unsigned int avail, posted, got = 0, advertise; + long oldval, newval; + + *adv_credits = 0; + if (!ic->i_flowctl) + return wanted; + +try_again: + advertise = 0; + oldval = newval = atomic_read(&ic->i_credits); + posted = IB_GET_POST_CREDITS(oldval); + avail = IB_GET_SEND_CREDITS(oldval); + + rdsdebug("rds_ib_send_grab_credits(%u): credits=%u posted=%u\n", + wanted, avail, posted); + + /* The last credit must be used to send a credit update. */ + if (avail && !posted) + avail--; + + if (avail < wanted) { + struct rds_connection *conn = ic->i_cm_id->context; + + /* Oops, there aren't that many credits left! */ + set_bit(RDS_LL_SEND_FULL, &conn->c_flags); + got = avail; + } else { + /* Sometimes you get what you want, lalala. */ + got = wanted; + } + newval -= IB_SET_SEND_CREDITS(got); + + /* + * If need_posted is non-zero, then the caller wants + * the posted regardless of whether any send credits are + * available. + */ + if (posted && (got || need_posted)) { + advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT); + newval -= IB_SET_POST_CREDITS(advertise); + } + + /* Finally bill everything */ + if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval) + goto try_again; + + *adv_credits = advertise; + return got; +} + +void rds_ib_send_add_credits(struct rds_connection *conn, unsigned int credits) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + + if (credits == 0) + return; + + rdsdebug("rds_ib_send_add_credits(%u): current=%u%s\n", + credits, + IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)), + test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : ""); + + atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits); + if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)) + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + + WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384); + + rds_ib_stats_inc(s_ib_rx_credit_updates); +} + +void rds_ib_advertise_credits(struct rds_connection *conn, unsigned int posted) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + + if (posted == 0) + return; + + atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits); + + /* Decide whether to send an update to the peer now. + * If we would send a credit update for every single buffer we + * post, we would end up with an ACK storm (ACK arrives, + * consumes buffer, we refill the ring, send ACK to remote + * advertising the newly posted buffer... ad inf) + * + * Performance pretty much depends on how often we send + * credit updates - too frequent updates mean lots of ACKs. + * Too infrequent updates, and the peer will run out of + * credits and has to throttle. + * For the time being, 16 seems to be a good compromise. + */ + if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16) + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); +} + +static inline void +rds_ib_xmit_populate_wr(struct rds_ib_connection *ic, + struct rds_ib_send_work *send, unsigned int pos, + unsigned long buffer, unsigned int length, + int send_flags) +{ + struct ib_sge *sge; + + WARN_ON(pos != send - ic->i_sends); + + send->s_wr.send_flags = send_flags; + send->s_wr.opcode = IB_WR_SEND; + send->s_wr.num_sge = 2; + send->s_wr.next = NULL; + send->s_queued = jiffies; + send->s_op = NULL; + + if (length != 0) { + sge = rds_ib_data_sge(ic, send->s_sge); + sge->addr = buffer; + sge->length = length; + sge->lkey = ic->i_mr->lkey; + + sge = rds_ib_header_sge(ic, send->s_sge); + } else { + /* We're sending a packet with no payload. There is only + * one SGE */ + send->s_wr.num_sge = 1; + sge = &send->s_sge[0]; + } + + sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header)); + sge->length = sizeof(struct rds_header); + sge->lkey = ic->i_mr->lkey; +} + +/* + * This can be called multiple times for a given message. The first time + * we see a message we map its scatterlist into the IB device so that + * we can provide that mapped address to the IB scatter gather entries + * in the IB work requests. We translate the scatterlist into a series + * of work requests that fragment the message. These work requests complete + * in order so we pass ownership of the message to the completion handler + * once we send the final fragment. + * + * The RDS core uses the c_send_lock to only enter this function once + * per connection. This makes sure that the tx ring alloc/unalloc pairs + * don't get out of sync and confuse the ring. + */ +int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, + unsigned int hdr_off, unsigned int sg, unsigned int off) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct ib_device *dev = ic->i_cm_id->device; + struct rds_ib_send_work *send = NULL; + struct rds_ib_send_work *first; + struct rds_ib_send_work *prev; + struct ib_send_wr *failed_wr; + struct scatterlist *scat; + u32 pos; + u32 i; + u32 work_alloc; + u32 credit_alloc; + u32 posted; + u32 adv_credits = 0; + int send_flags = 0; + int sent; + int ret; + int flow_controlled = 0; + + BUG_ON(off % RDS_FRAG_SIZE); + BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); + + /* FIXME we may overallocate here */ + if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) + i = 1; + else + i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); + + work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); + if (work_alloc == 0) { + set_bit(RDS_LL_SEND_FULL, &conn->c_flags); + rds_ib_stats_inc(s_ib_tx_ring_full); + ret = -ENOMEM; + goto out; + } + + credit_alloc = work_alloc; + if (ic->i_flowctl) { + credit_alloc = rds_ib_send_grab_credits(ic, work_alloc, &posted, 0); + adv_credits += posted; + if (credit_alloc < work_alloc) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); + work_alloc = credit_alloc; + flow_controlled++; + } + if (work_alloc == 0) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_ib_stats_inc(s_ib_tx_throttle); + ret = -ENOMEM; + goto out; + } + } + + /* map the message the first time we see it */ + if (ic->i_rm == NULL) { + /* + printk(KERN_NOTICE "rds_ib_xmit prep msg dport=%u flags=0x%x len=%d\n", + be16_to_cpu(rm->m_inc.i_hdr.h_dport), + rm->m_inc.i_hdr.h_flags, + be32_to_cpu(rm->m_inc.i_hdr.h_len)); + */ + if (rm->m_nents) { + rm->m_count = ib_dma_map_sg(dev, + rm->m_sg, rm->m_nents, DMA_TO_DEVICE); + rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); + if (rm->m_count == 0) { + rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + ret = -ENOMEM; /* XXX ? */ + goto out; + } + } else { + rm->m_count = 0; + } + + ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; + ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; + rds_message_addref(rm); + ic->i_rm = rm; + + /* Finalize the header */ + if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) + rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED; + if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) + rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; + + /* If it has a RDMA op, tell the peer we did it. This is + * used by the peer to release use-once RDMA MRs. */ + if (rm->m_rdma_op) { + struct rds_ext_header_rdma ext_hdr; + + ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); + rds_message_add_extension(&rm->m_inc.i_hdr, + RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); + } + if (rm->m_rdma_cookie) { + rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr, + rds_rdma_cookie_key(rm->m_rdma_cookie), + rds_rdma_cookie_offset(rm->m_rdma_cookie)); + } + + /* Note - rds_ib_piggyb_ack clears the ACK_REQUIRED bit, so + * we should not do this unless we have a chance of at least + * sticking the header into the send ring. Which is why we + * should call rds_ib_ring_alloc first. */ + rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_ib_piggyb_ack(ic)); + rds_message_make_checksum(&rm->m_inc.i_hdr); + + /* + * Update adv_credits since we reset the ACK_REQUIRED bit. + */ + rds_ib_send_grab_credits(ic, 0, &posted, 1); + adv_credits += posted; + BUG_ON(adv_credits > 255); + } else if (ic->i_rm != rm) + BUG(); + + send = &ic->i_sends[pos]; + first = send; + prev = NULL; + scat = &rm->m_sg[sg]; + sent = 0; + i = 0; + + /* Sometimes you want to put a fence between an RDMA + * READ and the following SEND. + * We could either do this all the time + * or when requested by the user. Right now, we let + * the application choose. + */ + if (rm->m_rdma_op && rm->m_rdma_op->r_fence) + send_flags = IB_SEND_FENCE; + + /* + * We could be copying the header into the unused tail of the page. + * That would need to be changed in the future when those pages might + * be mapped userspace pages or page cache pages. So instead we always + * use a second sge and our long-lived ring of mapped headers. We send + * the header after the data so that the data payload can be aligned on + * the receiver. + */ + + /* handle a 0-len message */ + if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { + rds_ib_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); + goto add_header; + } + + /* if there's data reference it with a chain of work reqs */ + for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { + unsigned int len; + + send = &ic->i_sends[pos]; + + len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); + rds_ib_xmit_populate_wr(ic, send, pos, + ib_sg_dma_address(dev, scat) + off, len, + send_flags); + + /* + * We want to delay signaling completions just enough to get + * the batching benefits but not so much that we create dead time + * on the wire. + */ + if (ic->i_unsignaled_wrs-- == 0) { + ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; + send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + } + + ic->i_unsignaled_bytes -= len; + if (ic->i_unsignaled_bytes <= 0) { + ic->i_unsignaled_bytes = rds_ib_sysctl_max_unsig_bytes; + send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + } + + /* + * Always signal the last one if we're stopping due to flow control. + */ + if (flow_controlled && i == (work_alloc-1)) + send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + + rdsdebug("send %p wr %p num_sge %u next %p\n", send, + &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + + sent += len; + off += len; + if (off == ib_sg_dma_len(dev, scat)) { + scat++; + off = 0; + } + +add_header: + /* Tack on the header after the data. The header SGE should already + * have been set up to point to the right header buffer. */ + memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); + + if (0) { + struct rds_header *hdr = &ic->i_send_hdrs[pos]; + + printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n", + be16_to_cpu(hdr->h_dport), + hdr->h_flags, + be32_to_cpu(hdr->h_len)); + } + if (adv_credits) { + struct rds_header *hdr = &ic->i_send_hdrs[pos]; + + /* add credit and redo the header checksum */ + hdr->h_credit = adv_credits; + rds_message_make_checksum(hdr); + adv_credits = 0; + rds_ib_stats_inc(s_ib_tx_credit_updates); + } + + if (prev) + prev->s_wr.next = &send->s_wr; + prev = send; + + pos = (pos + 1) % ic->i_send_ring.w_nr; + } + + /* Account the RDS header in the number of bytes we sent, but just once. + * The caller has no concept of fragmentation. */ + if (hdr_off == 0) + sent += sizeof(struct rds_header); + + /* if we finished the message then send completion owns it */ + if (scat == &rm->m_sg[rm->m_count]) { + prev->s_rm = ic->i_rm; + prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + ic->i_rm = NULL; + } + + if (i < work_alloc) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); + work_alloc = i; + } + if (ic->i_flowctl && i < credit_alloc) + rds_ib_send_add_credits(conn, credit_alloc - i); + + /* XXX need to worry about failed_wr and partial sends. */ + failed_wr = &first->s_wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, + first, &first->s_wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_wr); + if (ret) { + printk(KERN_WARNING "RDS/IB: ib_post_send to %pI4 " + "returned %d\n", &conn->c_faddr, ret); + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + if (prev->s_rm) { + ic->i_rm = prev->s_rm; + prev->s_rm = NULL; + } + /* Finesse this later */ + BUG(); + goto out; + } + + ret = sent; +out: + BUG_ON(adv_credits); + return ret; +} + +int rds_ib_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct rds_ib_send_work *send = NULL; + struct rds_ib_send_work *first; + struct rds_ib_send_work *prev; + struct ib_send_wr *failed_wr; + struct rds_ib_device *rds_ibdev; + struct scatterlist *scat; + unsigned long len; + u64 remote_addr = op->r_remote_addr; + u32 pos; + u32 work_alloc; + u32 i; + u32 j; + int sent; + int ret; + int num_sge; + + rds_ibdev = ib_get_client_data(ic->i_cm_id->device, &rds_ib_client); + + /* map the message the first time we see it */ + if (!op->r_mapped) { + op->r_count = ib_dma_map_sg(ic->i_cm_id->device, + op->r_sg, op->r_nents, (op->r_write) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); + if (op->r_count == 0) { + rds_ib_stats_inc(s_ib_tx_sg_mapping_failure); + ret = -ENOMEM; /* XXX ? */ + goto out; + } + + op->r_mapped = 1; + } + + /* + * Instead of knowing how to return a partial rdma read/write we insist that there + * be enough work requests to send the entire message. + */ + i = ceil(op->r_count, rds_ibdev->max_sge); + + work_alloc = rds_ib_ring_alloc(&ic->i_send_ring, i, &pos); + if (work_alloc != i) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_ib_stats_inc(s_ib_tx_ring_full); + ret = -ENOMEM; + goto out; + } + + send = &ic->i_sends[pos]; + first = send; + prev = NULL; + scat = &op->r_sg[0]; + sent = 0; + num_sge = op->r_count; + + for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { + send->s_wr.send_flags = 0; + send->s_queued = jiffies; + /* + * We want to delay signaling completions just enough to get + * the batching benefits but not so much that we create dead time on the wire. + */ + if (ic->i_unsignaled_wrs-- == 0) { + ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; + send->s_wr.send_flags = IB_SEND_SIGNALED; + } + + send->s_wr.opcode = op->r_write ? IB_WR_RDMA_WRITE : IB_WR_RDMA_READ; + send->s_wr.wr.rdma.remote_addr = remote_addr; + send->s_wr.wr.rdma.rkey = op->r_key; + send->s_op = op; + + if (num_sge > rds_ibdev->max_sge) { + send->s_wr.num_sge = rds_ibdev->max_sge; + num_sge -= rds_ibdev->max_sge; + } else { + send->s_wr.num_sge = num_sge; + } + + send->s_wr.next = NULL; + + if (prev) + prev->s_wr.next = &send->s_wr; + + for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { + len = ib_sg_dma_len(ic->i_cm_id->device, scat); + send->s_sge[j].addr = + ib_sg_dma_address(ic->i_cm_id->device, scat); + send->s_sge[j].length = len; + send->s_sge[j].lkey = ic->i_mr->lkey; + + sent += len; + rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr); + + remote_addr += len; + scat++; + } + + rdsdebug("send %p wr %p num_sge %u next %p\n", send, + &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + + prev = send; + if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) + send = ic->i_sends; + } + + /* if we finished the message then send completion owns it */ + if (scat == &op->r_sg[op->r_count]) + prev->s_wr.send_flags = IB_SEND_SIGNALED; + + if (i < work_alloc) { + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc - i); + work_alloc = i; + } + + failed_wr = &first->s_wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, + first, &first->s_wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_wr); + if (ret) { + printk(KERN_WARNING "RDS/IB: rdma ib_post_send to %pI4 " + "returned %d\n", &conn->c_faddr, ret); + rds_ib_ring_unalloc(&ic->i_send_ring, work_alloc); + goto out; + } + + if (unlikely(failed_wr != &first->s_wr)) { + printk(KERN_WARNING "RDS/IB: ib_post_send() rc=%d, but failed_wqe updated!\n", ret); + BUG_ON(failed_wr != &first->s_wr); + } + + +out: + return ret; +} + +void rds_ib_xmit_complete(struct rds_connection *conn) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + + /* We may have a pending ACK or window update we were unable + * to send previously (due to flow control). Try again. */ + rds_ib_attempt_ack(ic); +} -- cgit v1.2.3 From 1e23b3ee0e943060a2e677cd20ed666b6060e12d Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:34 +0000 Subject: RDS/IB: Receive datagrams via IB Header parsing, ring refill. It puts the incoming data into an rds_incoming struct, which is passed up to rds-core. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_recv.c | 869 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 869 insertions(+) create mode 100644 net/rds/ib_recv.c (limited to 'net') diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c new file mode 100644 index 000000000000..5061b5502162 --- /dev/null +++ b/net/rds/ib_recv.c @@ -0,0 +1,869 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include + +#include "rds.h" +#include "ib.h" + +static struct kmem_cache *rds_ib_incoming_slab; +static struct kmem_cache *rds_ib_frag_slab; +static atomic_t rds_ib_allocation = ATOMIC_INIT(0); + +static void rds_ib_frag_drop_page(struct rds_page_frag *frag) +{ + rdsdebug("frag %p page %p\n", frag, frag->f_page); + __free_page(frag->f_page); + frag->f_page = NULL; +} + +static void rds_ib_frag_free(struct rds_page_frag *frag) +{ + rdsdebug("frag %p page %p\n", frag, frag->f_page); + BUG_ON(frag->f_page != NULL); + kmem_cache_free(rds_ib_frag_slab, frag); +} + +/* + * We map a page at a time. Its fragments are posted in order. This + * is called in fragment order as the fragments get send completion events. + * Only the last frag in the page performs the unmapping. + * + * It's OK for ring cleanup to call this in whatever order it likes because + * DMA is not in flight and so we can unmap while other ring entries still + * hold page references in their frags. + */ +static void rds_ib_recv_unmap_page(struct rds_ib_connection *ic, + struct rds_ib_recv_work *recv) +{ + struct rds_page_frag *frag = recv->r_frag; + + rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page); + if (frag->f_mapped) + ib_dma_unmap_page(ic->i_cm_id->device, + frag->f_mapped, + RDS_FRAG_SIZE, DMA_FROM_DEVICE); + frag->f_mapped = 0; +} + +void rds_ib_recv_init_ring(struct rds_ib_connection *ic) +{ + struct rds_ib_recv_work *recv; + u32 i; + + for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) { + struct ib_sge *sge; + + recv->r_ibinc = NULL; + recv->r_frag = NULL; + + recv->r_wr.next = NULL; + recv->r_wr.wr_id = i; + recv->r_wr.sg_list = recv->r_sge; + recv->r_wr.num_sge = RDS_IB_RECV_SGE; + + sge = rds_ib_data_sge(ic, recv->r_sge); + sge->addr = 0; + sge->length = RDS_FRAG_SIZE; + sge->lkey = ic->i_mr->lkey; + + sge = rds_ib_header_sge(ic, recv->r_sge); + sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); + sge->length = sizeof(struct rds_header); + sge->lkey = ic->i_mr->lkey; + } +} + +static void rds_ib_recv_clear_one(struct rds_ib_connection *ic, + struct rds_ib_recv_work *recv) +{ + if (recv->r_ibinc) { + rds_inc_put(&recv->r_ibinc->ii_inc); + recv->r_ibinc = NULL; + } + if (recv->r_frag) { + rds_ib_recv_unmap_page(ic, recv); + if (recv->r_frag->f_page) + rds_ib_frag_drop_page(recv->r_frag); + rds_ib_frag_free(recv->r_frag); + recv->r_frag = NULL; + } +} + +void rds_ib_recv_clear_ring(struct rds_ib_connection *ic) +{ + u32 i; + + for (i = 0; i < ic->i_recv_ring.w_nr; i++) + rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); + + if (ic->i_frag.f_page) + rds_ib_frag_drop_page(&ic->i_frag); +} + +static int rds_ib_recv_refill_one(struct rds_connection *conn, + struct rds_ib_recv_work *recv, + gfp_t kptr_gfp, gfp_t page_gfp) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + dma_addr_t dma_addr; + struct ib_sge *sge; + int ret = -ENOMEM; + + if (recv->r_ibinc == NULL) { + if (atomic_read(&rds_ib_allocation) >= rds_ib_sysctl_max_recv_allocation) { + rds_ib_stats_inc(s_ib_rx_alloc_limit); + goto out; + } + recv->r_ibinc = kmem_cache_alloc(rds_ib_incoming_slab, + kptr_gfp); + if (recv->r_ibinc == NULL) + goto out; + atomic_inc(&rds_ib_allocation); + INIT_LIST_HEAD(&recv->r_ibinc->ii_frags); + rds_inc_init(&recv->r_ibinc->ii_inc, conn, conn->c_faddr); + } + + if (recv->r_frag == NULL) { + recv->r_frag = kmem_cache_alloc(rds_ib_frag_slab, kptr_gfp); + if (recv->r_frag == NULL) + goto out; + INIT_LIST_HEAD(&recv->r_frag->f_item); + recv->r_frag->f_page = NULL; + } + + if (ic->i_frag.f_page == NULL) { + ic->i_frag.f_page = alloc_page(page_gfp); + if (ic->i_frag.f_page == NULL) + goto out; + ic->i_frag.f_offset = 0; + } + + dma_addr = ib_dma_map_page(ic->i_cm_id->device, + ic->i_frag.f_page, + ic->i_frag.f_offset, + RDS_FRAG_SIZE, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr)) + goto out; + + /* + * Once we get the RDS_PAGE_LAST_OFF frag then rds_ib_frag_unmap() + * must be called on this recv. This happens as completions hit + * in order or on connection shutdown. + */ + recv->r_frag->f_page = ic->i_frag.f_page; + recv->r_frag->f_offset = ic->i_frag.f_offset; + recv->r_frag->f_mapped = dma_addr; + + sge = rds_ib_data_sge(ic, recv->r_sge); + sge->addr = dma_addr; + sge->length = RDS_FRAG_SIZE; + + sge = rds_ib_header_sge(ic, recv->r_sge); + sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); + sge->length = sizeof(struct rds_header); + + get_page(recv->r_frag->f_page); + + if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { + ic->i_frag.f_offset += RDS_FRAG_SIZE; + } else { + put_page(ic->i_frag.f_page); + ic->i_frag.f_page = NULL; + ic->i_frag.f_offset = 0; + } + + ret = 0; +out: + return ret; +} + +/* + * This tries to allocate and post unused work requests after making sure that + * they have all the allocations they need to queue received fragments into + * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc + * pairs don't go unmatched. + * + * -1 is returned if posting fails due to temporary resource exhaustion. + */ +int rds_ib_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, + gfp_t page_gfp, int prefill) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct rds_ib_recv_work *recv; + struct ib_recv_wr *failed_wr; + unsigned int posted = 0; + int ret = 0; + u32 pos; + + while ((prefill || rds_conn_up(conn)) + && rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) { + if (pos >= ic->i_recv_ring.w_nr) { + printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", + pos); + ret = -EINVAL; + break; + } + + recv = &ic->i_recvs[pos]; + ret = rds_ib_recv_refill_one(conn, recv, kptr_gfp, page_gfp); + if (ret) { + ret = -1; + break; + } + + /* XXX when can this fail? */ + ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); + rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, + recv->r_ibinc, recv->r_frag->f_page, + (long) recv->r_frag->f_mapped, ret); + if (ret) { + rds_ib_conn_error(conn, "recv post on " + "%pI4 returned %d, disconnecting and " + "reconnecting\n", &conn->c_faddr, + ret); + ret = -1; + break; + } + + posted++; + } + + /* We're doing flow control - update the window. */ + if (ic->i_flowctl && posted) + rds_ib_advertise_credits(conn, posted); + + if (ret) + rds_ib_ring_unalloc(&ic->i_recv_ring, 1); + return ret; +} + +void rds_ib_inc_purge(struct rds_incoming *inc) +{ + struct rds_ib_incoming *ibinc; + struct rds_page_frag *frag; + struct rds_page_frag *pos; + + ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); + rdsdebug("purging ibinc %p inc %p\n", ibinc, inc); + + list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { + list_del_init(&frag->f_item); + rds_ib_frag_drop_page(frag); + rds_ib_frag_free(frag); + } +} + +void rds_ib_inc_free(struct rds_incoming *inc) +{ + struct rds_ib_incoming *ibinc; + + ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); + + rds_ib_inc_purge(inc); + rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc); + BUG_ON(!list_empty(&ibinc->ii_frags)); + kmem_cache_free(rds_ib_incoming_slab, ibinc); + atomic_dec(&rds_ib_allocation); + BUG_ON(atomic_read(&rds_ib_allocation) < 0); +} + +int rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, + size_t size) +{ + struct rds_ib_incoming *ibinc; + struct rds_page_frag *frag; + struct iovec *iov = first_iov; + unsigned long to_copy; + unsigned long frag_off = 0; + unsigned long iov_off = 0; + int copied = 0; + int ret; + u32 len; + + ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); + frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item); + len = be32_to_cpu(inc->i_hdr.h_len); + + while (copied < size && copied < len) { + if (frag_off == RDS_FRAG_SIZE) { + frag = list_entry(frag->f_item.next, + struct rds_page_frag, f_item); + frag_off = 0; + } + while (iov_off == iov->iov_len) { + iov_off = 0; + iov++; + } + + to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off); + to_copy = min_t(size_t, to_copy, size - copied); + to_copy = min_t(unsigned long, to_copy, len - copied); + + rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " + "[%p, %lu] + %lu\n", + to_copy, iov->iov_base, iov->iov_len, iov_off, + frag->f_page, frag->f_offset, frag_off); + + /* XXX needs + offset for multiple recvs per page */ + ret = rds_page_copy_to_user(frag->f_page, + frag->f_offset + frag_off, + iov->iov_base + iov_off, + to_copy); + if (ret) { + copied = ret; + break; + } + + iov_off += to_copy; + frag_off += to_copy; + copied += to_copy; + } + + return copied; +} + +/* ic starts out kzalloc()ed */ +void rds_ib_recv_init_ack(struct rds_ib_connection *ic) +{ + struct ib_send_wr *wr = &ic->i_ack_wr; + struct ib_sge *sge = &ic->i_ack_sge; + + sge->addr = ic->i_ack_dma; + sge->length = sizeof(struct rds_header); + sge->lkey = ic->i_mr->lkey; + + wr->sg_list = sge; + wr->num_sge = 1; + wr->opcode = IB_WR_SEND; + wr->wr_id = RDS_IB_ACK_WR_ID; + wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; +} + +/* + * You'd think that with reliable IB connections you wouldn't need to ack + * messages that have been received. The problem is that IB hardware generates + * an ack message before it has DMAed the message into memory. This creates a + * potential message loss if the HCA is disabled for any reason between when it + * sends the ack and before the message is DMAed and processed. This is only a + * potential issue if another HCA is available for fail-over. + * + * When the remote host receives our ack they'll free the sent message from + * their send queue. To decrease the latency of this we always send an ack + * immediately after we've received messages. + * + * For simplicity, we only have one ack in flight at a time. This puts + * pressure on senders to have deep enough send queues to absorb the latency of + * a single ack frame being in flight. This might not be good enough. + * + * This is implemented by have a long-lived send_wr and sge which point to a + * statically allocated ack frame. This ack wr does not fall under the ring + * accounting that the tx and rx wrs do. The QP attribute specifically makes + * room for it beyond the ring size. Send completion notices its special + * wr_id and avoids working with the ring in that case. + */ +static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, + int ack_required) +{ + rds_ib_set_64bit(&ic->i_ack_next, seq); + if (ack_required) { + smp_mb__before_clear_bit(); + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + } +} + +static u64 rds_ib_get_ack(struct rds_ib_connection *ic) +{ + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + smp_mb__after_clear_bit(); + + return ic->i_ack_next; +} + +static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) +{ + struct rds_header *hdr = ic->i_ack; + struct ib_send_wr *failed_wr; + u64 seq; + int ret; + + seq = rds_ib_get_ack(ic); + + rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); + rds_message_populate_header(hdr, 0, 0, 0); + hdr->h_ack = cpu_to_be64(seq); + hdr->h_credit = adv_credits; + rds_message_make_checksum(hdr); + ic->i_ack_queued = jiffies; + + ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr); + if (unlikely(ret)) { + /* Failed to send. Release the WR, and + * force another ACK. + */ + clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + + rds_ib_stats_inc(s_ib_ack_send_failure); + /* Need to finesse this later. */ + BUG(); + } else + rds_ib_stats_inc(s_ib_ack_sent); +} + +/* + * There are 3 ways of getting acknowledgements to the peer: + * 1. We call rds_ib_attempt_ack from the recv completion handler + * to send an ACK-only frame. + * However, there can be only one such frame in the send queue + * at any time, so we may have to postpone it. + * 2. When another (data) packet is transmitted while there's + * an ACK in the queue, we piggyback the ACK sequence number + * on the data packet. + * 3. If the ACK WR is done sending, we get called from the + * send queue completion handler, and check whether there's + * another ACK pending (postponed because the WR was on the + * queue). If so, we transmit it. + * + * We maintain 2 variables: + * - i_ack_flags, which keeps track of whether the ACK WR + * is currently in the send queue or not (IB_ACK_IN_FLIGHT) + * - i_ack_next, which is the last sequence number we received + * + * Potentially, send queue and receive queue handlers can run concurrently. + * + * Reconnecting complicates this picture just slightly. When we + * reconnect, we may be seeing duplicate packets. The peer + * is retransmitting them, because it hasn't seen an ACK for + * them. It is important that we ACK these. + * + * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with + * this flag set *MUST* be acknowledged immediately. + */ + +/* + * When we get here, we're called from the recv queue handler. + * Check whether we ought to transmit an ACK. + */ +void rds_ib_attempt_ack(struct rds_ib_connection *ic) +{ + unsigned int adv_credits; + + if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) + return; + + if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) { + rds_ib_stats_inc(s_ib_ack_send_delayed); + return; + } + + /* Can we get a send credit? */ + if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0)) { + rds_ib_stats_inc(s_ib_tx_throttle); + clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); + return; + } + + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + rds_ib_send_ack(ic, adv_credits); +} + +/* + * We get here from the send completion handler, when the + * adapter tells us the ACK frame was sent. + */ +void rds_ib_ack_send_complete(struct rds_ib_connection *ic) +{ + clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); + rds_ib_attempt_ack(ic); +} + +/* + * This is called by the regular xmit code when it wants to piggyback + * an ACK on an outgoing frame. + */ +u64 rds_ib_piggyb_ack(struct rds_ib_connection *ic) +{ + if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) + rds_ib_stats_inc(s_ib_ack_send_piggybacked); + return rds_ib_get_ack(ic); +} + +/* + * It's kind of lame that we're copying from the posted receive pages into + * long-lived bitmaps. We could have posted the bitmaps and rdma written into + * them. But receiving new congestion bitmaps should be a *rare* event, so + * hopefully we won't need to invest that complexity in making it more + * efficient. By copying we can share a simpler core with TCP which has to + * copy. + */ +static void rds_ib_cong_recv(struct rds_connection *conn, + struct rds_ib_incoming *ibinc) +{ + struct rds_cong_map *map; + unsigned int map_off; + unsigned int map_page; + struct rds_page_frag *frag; + unsigned long frag_off; + unsigned long to_copy; + unsigned long copied; + uint64_t uncongested = 0; + void *addr; + + /* catch completely corrupt packets */ + if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES) + return; + + map = conn->c_fcong; + map_page = 0; + map_off = 0; + + frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item); + frag_off = 0; + + copied = 0; + + while (copied < RDS_CONG_MAP_BYTES) { + uint64_t *src, *dst; + unsigned int k; + + to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); + BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ + + addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); + + src = addr + frag_off; + dst = (void *)map->m_page_addrs[map_page] + map_off; + for (k = 0; k < to_copy; k += 8) { + /* Record ports that became uncongested, ie + * bits that changed from 0 to 1. */ + uncongested |= ~(*src) & *dst; + *dst++ = *src++; + } + kunmap_atomic(addr, KM_SOFTIRQ0); + + copied += to_copy; + + map_off += to_copy; + if (map_off == PAGE_SIZE) { + map_off = 0; + map_page++; + } + + frag_off += to_copy; + if (frag_off == RDS_FRAG_SIZE) { + frag = list_entry(frag->f_item.next, + struct rds_page_frag, f_item); + frag_off = 0; + } + } + + /* the congestion map is in little endian order */ + uncongested = le64_to_cpu(uncongested); + + rds_cong_map_updated(map, uncongested); +} + +/* + * Rings are posted with all the allocations they'll need to queue the + * incoming message to the receiving socket so this can't fail. + * All fragments start with a header, so we can make sure we're not receiving + * garbage, and we can tell a small 8 byte fragment from an ACK frame. + */ +struct rds_ib_ack_state { + u64 ack_next; + u64 ack_recv; + unsigned int ack_required:1; + unsigned int ack_next_valid:1; + unsigned int ack_recv_valid:1; +}; + +static void rds_ib_process_recv(struct rds_connection *conn, + struct rds_ib_recv_work *recv, u32 byte_len, + struct rds_ib_ack_state *state) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + struct rds_ib_incoming *ibinc = ic->i_ibinc; + struct rds_header *ihdr, *hdr; + + /* XXX shut down the connection if port 0,0 are seen? */ + + rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv, + byte_len); + + if (byte_len < sizeof(struct rds_header)) { + rds_ib_conn_error(conn, "incoming message " + "from %pI4 didn't inclue a " + "header, disconnecting and " + "reconnecting\n", + &conn->c_faddr); + return; + } + byte_len -= sizeof(struct rds_header); + + ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; + + /* Validate the checksum. */ + if (!rds_message_verify_checksum(ihdr)) { + rds_ib_conn_error(conn, "incoming message " + "from %pI4 has corrupted header - " + "forcing a reconnect\n", + &conn->c_faddr); + rds_stats_inc(s_recv_drop_bad_checksum); + return; + } + + /* Process the ACK sequence which comes with every packet */ + state->ack_recv = be64_to_cpu(ihdr->h_ack); + state->ack_recv_valid = 1; + + /* Process the credits update if there was one */ + if (ihdr->h_credit) + rds_ib_send_add_credits(conn, ihdr->h_credit); + + if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) { + /* This is an ACK-only packet. The fact that it gets + * special treatment here is that historically, ACKs + * were rather special beasts. + */ + rds_ib_stats_inc(s_ib_ack_received); + + /* + * Usually the frags make their way on to incs and are then freed as + * the inc is freed. We don't go that route, so we have to drop the + * page ref ourselves. We can't just leave the page on the recv + * because that confuses the dma mapping of pages and each recv's use + * of a partial page. We can leave the frag, though, it will be + * reused. + * + * FIXME: Fold this into the code path below. + */ + rds_ib_frag_drop_page(recv->r_frag); + return; + } + + /* + * If we don't already have an inc on the connection then this + * fragment has a header and starts a message.. copy its header + * into the inc and save the inc so we can hang upcoming fragments + * off its list. + */ + if (ibinc == NULL) { + ibinc = recv->r_ibinc; + recv->r_ibinc = NULL; + ic->i_ibinc = ibinc; + + hdr = &ibinc->ii_inc.i_hdr; + memcpy(hdr, ihdr, sizeof(*hdr)); + ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); + + rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc, + ic->i_recv_data_rem, hdr->h_flags); + } else { + hdr = &ibinc->ii_inc.i_hdr; + /* We can't just use memcmp here; fragments of a + * single message may carry different ACKs */ + if (hdr->h_sequence != ihdr->h_sequence + || hdr->h_len != ihdr->h_len + || hdr->h_sport != ihdr->h_sport + || hdr->h_dport != ihdr->h_dport) { + rds_ib_conn_error(conn, + "fragment header mismatch; forcing reconnect\n"); + return; + } + } + + list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags); + recv->r_frag = NULL; + + if (ic->i_recv_data_rem > RDS_FRAG_SIZE) + ic->i_recv_data_rem -= RDS_FRAG_SIZE; + else { + ic->i_recv_data_rem = 0; + ic->i_ibinc = NULL; + + if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) + rds_ib_cong_recv(conn, ibinc); + else { + rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, + &ibinc->ii_inc, GFP_ATOMIC, + KM_SOFTIRQ0); + state->ack_next = be64_to_cpu(hdr->h_sequence); + state->ack_next_valid = 1; + } + + /* Evaluate the ACK_REQUIRED flag *after* we received + * the complete frame, and after bumping the next_rx + * sequence. */ + if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) { + rds_stats_inc(s_recv_ack_required); + state->ack_required = 1; + } + + rds_inc_put(&ibinc->ii_inc); + } +} + +/* + * Plucking the oldest entry from the ring can be done concurrently with + * the thread refilling the ring. Each ring operation is protected by + * spinlocks and the transient state of refilling doesn't change the + * recording of which entry is oldest. + * + * This relies on IB only calling one cq comp_handler for each cq so that + * there will only be one caller of rds_recv_incoming() per RDS connection. + */ +void rds_ib_recv_cq_comp_handler(struct ib_cq *cq, void *context) +{ + struct rds_connection *conn = context; + struct rds_ib_connection *ic = conn->c_transport_data; + struct ib_wc wc; + struct rds_ib_ack_state state = { 0, }; + struct rds_ib_recv_work *recv; + + rdsdebug("conn %p cq %p\n", conn, cq); + + rds_ib_stats_inc(s_ib_rx_cq_call); + + ib_req_notify_cq(cq, IB_CQ_SOLICITED); + + while (ib_poll_cq(cq, 1, &wc) > 0) { + rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", + (unsigned long long)wc.wr_id, wc.status, wc.byte_len, + be32_to_cpu(wc.ex.imm_data)); + rds_ib_stats_inc(s_ib_rx_cq_event); + + recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; + + rds_ib_recv_unmap_page(ic, recv); + + /* + * Also process recvs in connecting state because it is possible + * to get a recv completion _before_ the rdmacm ESTABLISHED + * event is processed. + */ + if (rds_conn_up(conn) || rds_conn_connecting(conn)) { + /* We expect errors as the qp is drained during shutdown */ + if (wc.status == IB_WC_SUCCESS) { + rds_ib_process_recv(conn, recv, wc.byte_len, &state); + } else { + rds_ib_conn_error(conn, "recv completion on " + "%pI4 had status %u, disconnecting and " + "reconnecting\n", &conn->c_faddr, + wc.status); + } + } + + rds_ib_ring_free(&ic->i_recv_ring, 1); + } + + if (state.ack_next_valid) + rds_ib_set_ack(ic, state.ack_next, state.ack_required); + if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { + rds_send_drop_acked(conn, state.ack_recv, NULL); + ic->i_ack_recv = state.ack_recv; + } + if (rds_conn_up(conn)) + rds_ib_attempt_ack(ic); + + /* If we ever end up with a really empty receive ring, we're + * in deep trouble, as the sender will definitely see RNR + * timeouts. */ + if (rds_ib_ring_empty(&ic->i_recv_ring)) + rds_ib_stats_inc(s_ib_rx_ring_empty); + + /* + * If the ring is running low, then schedule the thread to refill. + */ + if (rds_ib_ring_low(&ic->i_recv_ring)) + queue_delayed_work(rds_wq, &conn->c_recv_w, 0); +} + +int rds_ib_recv(struct rds_connection *conn) +{ + struct rds_ib_connection *ic = conn->c_transport_data; + int ret = 0; + + rdsdebug("conn %p\n", conn); + + /* + * If we get a temporary posting failure in this context then + * we're really low and we want the caller to back off for a bit. + */ + mutex_lock(&ic->i_recv_mutex); + if (rds_ib_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0)) + ret = -ENOMEM; + else + rds_ib_stats_inc(s_ib_rx_refill_from_thread); + mutex_unlock(&ic->i_recv_mutex); + + if (rds_conn_up(conn)) + rds_ib_attempt_ack(ic); + + return ret; +} + +int __init rds_ib_recv_init(void) +{ + struct sysinfo si; + int ret = -ENOMEM; + + /* Default to 30% of all available RAM for recv memory */ + si_meminfo(&si); + rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; + + rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", + sizeof(struct rds_ib_incoming), + 0, 0, NULL); + if (rds_ib_incoming_slab == NULL) + goto out; + + rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", + sizeof(struct rds_page_frag), + 0, 0, NULL); + if (rds_ib_frag_slab == NULL) + kmem_cache_destroy(rds_ib_incoming_slab); + else + ret = 0; +out: + return ret; +} + +void rds_ib_recv_exit(void) +{ + kmem_cache_destroy(rds_ib_incoming_slab); + kmem_cache_destroy(rds_ib_frag_slab); +} -- cgit v1.2.3 From e6babe4cc4ce48577d743cc0de19a214f2527956 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:35 +0000 Subject: RDS/IB: Stats and sysctls IB-specific stats and sysctls. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib_stats.c | 95 ++++++++++++++++++++++++++++++++++++ net/rds/ib_sysctl.c | 137 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 232 insertions(+) create mode 100644 net/rds/ib_stats.c create mode 100644 net/rds/ib_sysctl.c (limited to 'net') diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c new file mode 100644 index 000000000000..02e3e3d50d4a --- /dev/null +++ b/net/rds/ib_stats.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "ib.h" + +DEFINE_PER_CPU(struct rds_ib_statistics, rds_ib_stats) ____cacheline_aligned; + +static char *rds_ib_stat_names[] = { + "ib_connect_raced", + "ib_listen_closed_stale", + "ib_tx_cq_call", + "ib_tx_cq_event", + "ib_tx_ring_full", + "ib_tx_throttle", + "ib_tx_sg_mapping_failure", + "ib_tx_stalled", + "ib_tx_credit_updates", + "ib_rx_cq_call", + "ib_rx_cq_event", + "ib_rx_ring_empty", + "ib_rx_refill_from_cq", + "ib_rx_refill_from_thread", + "ib_rx_alloc_limit", + "ib_rx_credit_updates", + "ib_ack_sent", + "ib_ack_send_failure", + "ib_ack_send_delayed", + "ib_ack_send_piggybacked", + "ib_ack_received", + "ib_rdma_mr_alloc", + "ib_rdma_mr_free", + "ib_rdma_mr_used", + "ib_rdma_mr_pool_flush", + "ib_rdma_mr_pool_wait", + "ib_rdma_mr_pool_depleted", +}; + +unsigned int rds_ib_stats_info_copy(struct rds_info_iterator *iter, + unsigned int avail) +{ + struct rds_ib_statistics stats = {0, }; + uint64_t *src; + uint64_t *sum; + size_t i; + int cpu; + + if (avail < ARRAY_SIZE(rds_ib_stat_names)) + goto out; + + for_each_online_cpu(cpu) { + src = (uint64_t *)&(per_cpu(rds_ib_stats, cpu)); + sum = (uint64_t *)&stats; + for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) + *(sum++) += *(src++); + } + + rds_stats_info_copy(iter, (uint64_t *)&stats, rds_ib_stat_names, + ARRAY_SIZE(rds_ib_stat_names)); +out: + return ARRAY_SIZE(rds_ib_stat_names); +} diff --git a/net/rds/ib_sysctl.c b/net/rds/ib_sysctl.c new file mode 100644 index 000000000000..d87830db93a0 --- /dev/null +++ b/net/rds/ib_sysctl.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "ib.h" + +static struct ctl_table_header *rds_ib_sysctl_hdr; + +unsigned long rds_ib_sysctl_max_send_wr = RDS_IB_DEFAULT_SEND_WR; +unsigned long rds_ib_sysctl_max_recv_wr = RDS_IB_DEFAULT_RECV_WR; +unsigned long rds_ib_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE; +static unsigned long rds_ib_sysctl_max_wr_min = 1; +/* hardware will fail CQ creation long before this */ +static unsigned long rds_ib_sysctl_max_wr_max = (u32)~0; + +unsigned long rds_ib_sysctl_max_unsig_wrs = 16; +static unsigned long rds_ib_sysctl_max_unsig_wr_min = 1; +static unsigned long rds_ib_sysctl_max_unsig_wr_max = 64; + +unsigned long rds_ib_sysctl_max_unsig_bytes = (16 << 20); +static unsigned long rds_ib_sysctl_max_unsig_bytes_min = 1; +static unsigned long rds_ib_sysctl_max_unsig_bytes_max = ~0UL; + +unsigned int rds_ib_sysctl_flow_control = 1; + +ctl_table rds_ib_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_send_wr", + .data = &rds_ib_sysctl_max_send_wr, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &rds_ib_sysctl_max_wr_min, + .extra2 = &rds_ib_sysctl_max_wr_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_recv_wr", + .data = &rds_ib_sysctl_max_recv_wr, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &rds_ib_sysctl_max_wr_min, + .extra2 = &rds_ib_sysctl_max_wr_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_unsignaled_wr", + .data = &rds_ib_sysctl_max_unsig_wrs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &rds_ib_sysctl_max_unsig_wr_min, + .extra2 = &rds_ib_sysctl_max_unsig_wr_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_unsignaled_bytes", + .data = &rds_ib_sysctl_max_unsig_bytes, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &rds_ib_sysctl_max_unsig_bytes_min, + .extra2 = &rds_ib_sysctl_max_unsig_bytes_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_recv_allocation", + .data = &rds_ib_sysctl_max_recv_allocation, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "flow_control", + .data = &rds_ib_sysctl_flow_control, + .maxlen = sizeof(rds_ib_sysctl_flow_control), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0} +}; + +static struct ctl_path rds_ib_sysctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "ib", .ctl_name = CTL_UNNUMBERED, }, + { } +}; + +void rds_ib_sysctl_exit(void) +{ + if (rds_ib_sysctl_hdr) + unregister_sysctl_table(rds_ib_sysctl_hdr); +} + +int __init rds_ib_sysctl_init(void) +{ + rds_ib_sysctl_hdr = register_sysctl_paths(rds_ib_sysctl_path, rds_ib_sysctl_table); + if (rds_ib_sysctl_hdr == NULL) + return -ENOMEM; + return 0; +} -- cgit v1.2.3 From fcd8b7c0ecf792dd824b2edcd63cb2c08563c340 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:36 +0000 Subject: RDS: Add iWARP support Support for iWARP NICs is implemented as a separate RDS transport from IB. The code, however, is very similar to IB (it was forked, basically.) so let's keep it in one changeset. The reason for this duplicationis that despite its similarity to IB, there are a number of places where it has different semantics. iwarp zcopy support is still under development, and giving it its own sandbox ensures that IB code isn't disrupted while iwarp changes. Over time these transports will re-converge. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/iw.c | 333 ++++++++++++++++++ net/rds/iw.h | 395 +++++++++++++++++++++ net/rds/iw_cm.c | 750 ++++++++++++++++++++++++++++++++++++++++ net/rds/iw_rdma.c | 888 +++++++++++++++++++++++++++++++++++++++++++++++ net/rds/iw_recv.c | 869 ++++++++++++++++++++++++++++++++++++++++++++++ net/rds/iw_ring.c | 169 +++++++++ net/rds/iw_send.c | 975 ++++++++++++++++++++++++++++++++++++++++++++++++++++ net/rds/iw_stats.c | 95 +++++ net/rds/iw_sysctl.c | 137 ++++++++ 9 files changed, 4611 insertions(+) create mode 100644 net/rds/iw.c create mode 100644 net/rds/iw.h create mode 100644 net/rds/iw_cm.c create mode 100644 net/rds/iw_rdma.c create mode 100644 net/rds/iw_recv.c create mode 100644 net/rds/iw_ring.c create mode 100644 net/rds/iw_send.c create mode 100644 net/rds/iw_stats.c create mode 100644 net/rds/iw_sysctl.c (limited to 'net') diff --git a/net/rds/iw.c b/net/rds/iw.c new file mode 100644 index 000000000000..1b56905c4c08 --- /dev/null +++ b/net/rds/iw.c @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include +#include +#include +#include + +#include "rds.h" +#include "iw.h" + +unsigned int fastreg_pool_size = RDS_FASTREG_POOL_SIZE; +unsigned int fastreg_message_size = RDS_FASTREG_SIZE + 1; /* +1 allows for unaligned MRs */ + +module_param(fastreg_pool_size, int, 0444); +MODULE_PARM_DESC(fastreg_pool_size, " Max number of fastreg MRs per device"); +module_param(fastreg_message_size, int, 0444); +MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MRs)"); + +struct list_head rds_iw_devices; + +DEFINE_SPINLOCK(iw_nodev_conns_lock); +LIST_HEAD(iw_nodev_conns); + +void rds_iw_add_one(struct ib_device *device) +{ + struct rds_iw_device *rds_iwdev; + struct ib_device_attr *dev_attr; + + /* Only handle iwarp devices */ + if (device->node_type != RDMA_NODE_RNIC) + return; + + dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL); + if (!dev_attr) + return; + + if (ib_query_device(device, dev_attr)) { + rdsdebug("Query device failed for %s\n", device->name); + goto free_attr; + } + + rds_iwdev = kmalloc(sizeof *rds_iwdev, GFP_KERNEL); + if (!rds_iwdev) + goto free_attr; + + spin_lock_init(&rds_iwdev->spinlock); + + rds_iwdev->dma_local_lkey = !!(dev_attr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY); + rds_iwdev->max_wrs = dev_attr->max_qp_wr; + rds_iwdev->max_sge = min(dev_attr->max_sge, RDS_IW_MAX_SGE); + + rds_iwdev->page_shift = max(PAGE_SHIFT, ffs(dev_attr->page_size_cap) - 1); + + rds_iwdev->dev = device; + rds_iwdev->pd = ib_alloc_pd(device); + if (IS_ERR(rds_iwdev->pd)) + goto free_dev; + + if (!rds_iwdev->dma_local_lkey) { + if (device->node_type != RDMA_NODE_RNIC) { + rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd, + IB_ACCESS_LOCAL_WRITE); + } else { + rds_iwdev->mr = ib_get_dma_mr(rds_iwdev->pd, + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_LOCAL_WRITE); + } + if (IS_ERR(rds_iwdev->mr)) + goto err_pd; + } else + rds_iwdev->mr = NULL; + + rds_iwdev->mr_pool = rds_iw_create_mr_pool(rds_iwdev); + if (IS_ERR(rds_iwdev->mr_pool)) { + rds_iwdev->mr_pool = NULL; + goto err_mr; + } + + INIT_LIST_HEAD(&rds_iwdev->cm_id_list); + INIT_LIST_HEAD(&rds_iwdev->conn_list); + list_add_tail(&rds_iwdev->list, &rds_iw_devices); + + ib_set_client_data(device, &rds_iw_client, rds_iwdev); + + goto free_attr; + +err_mr: + if (rds_iwdev->mr) + ib_dereg_mr(rds_iwdev->mr); +err_pd: + ib_dealloc_pd(rds_iwdev->pd); +free_dev: + kfree(rds_iwdev); +free_attr: + kfree(dev_attr); +} + +void rds_iw_remove_one(struct ib_device *device) +{ + struct rds_iw_device *rds_iwdev; + struct rds_iw_cm_id *i_cm_id, *next; + + rds_iwdev = ib_get_client_data(device, &rds_iw_client); + if (!rds_iwdev) + return; + + spin_lock_irq(&rds_iwdev->spinlock); + list_for_each_entry_safe(i_cm_id, next, &rds_iwdev->cm_id_list, list) { + list_del(&i_cm_id->list); + kfree(i_cm_id); + } + spin_unlock_irq(&rds_iwdev->spinlock); + + rds_iw_remove_conns(rds_iwdev); + + if (rds_iwdev->mr_pool) + rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); + + if (rds_iwdev->mr) + ib_dereg_mr(rds_iwdev->mr); + + while (ib_dealloc_pd(rds_iwdev->pd)) { + rdsdebug("Failed to dealloc pd %p\n", rds_iwdev->pd); + msleep(1); + } + + list_del(&rds_iwdev->list); + kfree(rds_iwdev); +} + +struct ib_client rds_iw_client = { + .name = "rds_iw", + .add = rds_iw_add_one, + .remove = rds_iw_remove_one +}; + +static int rds_iw_conn_info_visitor(struct rds_connection *conn, + void *buffer) +{ + struct rds_info_rdma_connection *iinfo = buffer; + struct rds_iw_connection *ic; + + /* We will only ever look at IB transports */ + if (conn->c_trans != &rds_iw_transport) + return 0; + + iinfo->src_addr = conn->c_laddr; + iinfo->dst_addr = conn->c_faddr; + + memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); + memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); + if (rds_conn_state(conn) == RDS_CONN_UP) { + struct rds_iw_device *rds_iwdev; + struct rdma_dev_addr *dev_addr; + + ic = conn->c_transport_data; + dev_addr = &ic->i_cm_id->route.addr.dev_addr; + + ib_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); + ib_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); + + rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); + iinfo->max_send_wr = ic->i_send_ring.w_nr; + iinfo->max_recv_wr = ic->i_recv_ring.w_nr; + iinfo->max_send_sge = rds_iwdev->max_sge; + rds_iw_get_mr_info(rds_iwdev, iinfo); + } + return 1; +} + +static void rds_iw_ic_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + rds_for_each_conn_info(sock, len, iter, lens, + rds_iw_conn_info_visitor, + sizeof(struct rds_info_rdma_connection)); +} + + +/* + * Early RDS/IB was built to only bind to an address if there is an IPoIB + * device with that address set. + * + * If it were me, I'd advocate for something more flexible. Sending and + * receiving should be device-agnostic. Transports would try and maintain + * connections between peers who have messages queued. Userspace would be + * allowed to influence which paths have priority. We could call userspace + * asserting this policy "routing". + */ +static int rds_iw_laddr_check(__be32 addr) +{ + int ret; + struct rdma_cm_id *cm_id; + struct sockaddr_in sin; + + /* Create a CMA ID and try to bind it. This catches both + * IB and iWARP capable NICs. + */ + cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP); + if (!cm_id) + return -EADDRNOTAVAIL; + + memset(&sin, 0, sizeof(sin)); + sin.sin_family = AF_INET; + sin.sin_addr.s_addr = addr; + + /* rdma_bind_addr will only succeed for IB & iWARP devices */ + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); + /* due to this, we will claim to support IB devices unless we + check node_type. */ + if (ret || cm_id->device->node_type != RDMA_NODE_RNIC) + ret = -EADDRNOTAVAIL; + + rdsdebug("addr %pI4 ret %d node type %d\n", + &addr, ret, + cm_id->device ? cm_id->device->node_type : -1); + + rdma_destroy_id(cm_id); + + return ret; +} + +void rds_iw_exit(void) +{ + rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); + rds_iw_remove_nodev_conns(); + ib_unregister_client(&rds_iw_client); + rds_iw_sysctl_exit(); + rds_iw_recv_exit(); + rds_trans_unregister(&rds_iw_transport); +} + +struct rds_transport rds_iw_transport = { + .laddr_check = rds_iw_laddr_check, + .xmit_complete = rds_iw_xmit_complete, + .xmit = rds_iw_xmit, + .xmit_cong_map = NULL, + .xmit_rdma = rds_iw_xmit_rdma, + .recv = rds_iw_recv, + .conn_alloc = rds_iw_conn_alloc, + .conn_free = rds_iw_conn_free, + .conn_connect = rds_iw_conn_connect, + .conn_shutdown = rds_iw_conn_shutdown, + .inc_copy_to_user = rds_iw_inc_copy_to_user, + .inc_purge = rds_iw_inc_purge, + .inc_free = rds_iw_inc_free, + .cm_initiate_connect = rds_iw_cm_initiate_connect, + .cm_handle_connect = rds_iw_cm_handle_connect, + .cm_connect_complete = rds_iw_cm_connect_complete, + .stats_info_copy = rds_iw_stats_info_copy, + .exit = rds_iw_exit, + .get_mr = rds_iw_get_mr, + .sync_mr = rds_iw_sync_mr, + .free_mr = rds_iw_free_mr, + .flush_mrs = rds_iw_flush_mrs, + .t_owner = THIS_MODULE, + .t_name = "iwarp", + .t_prefer_loopback = 1, +}; + +int __init rds_iw_init(void) +{ + int ret; + + INIT_LIST_HEAD(&rds_iw_devices); + + ret = ib_register_client(&rds_iw_client); + if (ret) + goto out; + + ret = rds_iw_sysctl_init(); + if (ret) + goto out_ibreg; + + ret = rds_iw_recv_init(); + if (ret) + goto out_sysctl; + + ret = rds_trans_register(&rds_iw_transport); + if (ret) + goto out_recv; + + rds_info_register_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); + + goto out; + +out_recv: + rds_iw_recv_exit(); +out_sysctl: + rds_iw_sysctl_exit(); +out_ibreg: + ib_unregister_client(&rds_iw_client); +out: + return ret; +} + +MODULE_LICENSE("GPL"); + diff --git a/net/rds/iw.h b/net/rds/iw.h new file mode 100644 index 000000000000..0ddda34f2a1c --- /dev/null +++ b/net/rds/iw.h @@ -0,0 +1,395 @@ +#ifndef _RDS_IW_H +#define _RDS_IW_H + +#include +#include +#include "rds.h" +#include "rdma_transport.h" + +#define RDS_FASTREG_SIZE 20 +#define RDS_FASTREG_POOL_SIZE 2048 + +#define RDS_IW_MAX_SGE 8 +#define RDS_IW_RECV_SGE 2 + +#define RDS_IW_DEFAULT_RECV_WR 1024 +#define RDS_IW_DEFAULT_SEND_WR 256 + +#define RDS_IW_SUPPORTED_PROTOCOLS 0x00000003 /* minor versions supported */ + +extern struct list_head rds_iw_devices; + +/* + * IB posts RDS_FRAG_SIZE fragments of pages to the receive queues to + * try and minimize the amount of memory tied up both the device and + * socket receive queues. + */ +/* page offset of the final full frag that fits in the page */ +#define RDS_PAGE_LAST_OFF (((PAGE_SIZE / RDS_FRAG_SIZE) - 1) * RDS_FRAG_SIZE) +struct rds_page_frag { + struct list_head f_item; + struct page *f_page; + unsigned long f_offset; + dma_addr_t f_mapped; +}; + +struct rds_iw_incoming { + struct list_head ii_frags; + struct rds_incoming ii_inc; +}; + +struct rds_iw_connect_private { + /* Add new fields at the end, and don't permute existing fields. */ + __be32 dp_saddr; + __be32 dp_daddr; + u8 dp_protocol_major; + u8 dp_protocol_minor; + __be16 dp_protocol_minor_mask; /* bitmask */ + __be32 dp_reserved1; + __be64 dp_ack_seq; + __be32 dp_credit; /* non-zero enables flow ctl */ +}; + +struct rds_iw_scatterlist { + struct scatterlist *list; + unsigned int len; + int dma_len; + unsigned int dma_npages; + unsigned int bytes; +}; + +struct rds_iw_mapping { + spinlock_t m_lock; /* protect the mapping struct */ + struct list_head m_list; + struct rds_iw_mr *m_mr; + uint32_t m_rkey; + struct rds_iw_scatterlist m_sg; +}; + +struct rds_iw_send_work { + struct rds_message *s_rm; + + /* We should really put these into a union: */ + struct rds_rdma_op *s_op; + struct rds_iw_mapping *s_mapping; + struct ib_mr *s_mr; + struct ib_fast_reg_page_list *s_page_list; + unsigned char s_remap_count; + + struct ib_send_wr s_wr; + struct ib_sge s_sge[RDS_IW_MAX_SGE]; + unsigned long s_queued; +}; + +struct rds_iw_recv_work { + struct rds_iw_incoming *r_iwinc; + struct rds_page_frag *r_frag; + struct ib_recv_wr r_wr; + struct ib_sge r_sge[2]; +}; + +struct rds_iw_work_ring { + u32 w_nr; + u32 w_alloc_ptr; + u32 w_alloc_ctr; + u32 w_free_ptr; + atomic_t w_free_ctr; +}; + +struct rds_iw_device; + +struct rds_iw_connection { + + struct list_head iw_node; + struct rds_iw_device *rds_iwdev; + struct rds_connection *conn; + + /* alphabet soup, IBTA style */ + struct rdma_cm_id *i_cm_id; + struct ib_pd *i_pd; + struct ib_mr *i_mr; + struct ib_cq *i_send_cq; + struct ib_cq *i_recv_cq; + + /* tx */ + struct rds_iw_work_ring i_send_ring; + struct rds_message *i_rm; + struct rds_header *i_send_hdrs; + u64 i_send_hdrs_dma; + struct rds_iw_send_work *i_sends; + + /* rx */ + struct mutex i_recv_mutex; + struct rds_iw_work_ring i_recv_ring; + struct rds_iw_incoming *i_iwinc; + u32 i_recv_data_rem; + struct rds_header *i_recv_hdrs; + u64 i_recv_hdrs_dma; + struct rds_iw_recv_work *i_recvs; + struct rds_page_frag i_frag; + u64 i_ack_recv; /* last ACK received */ + + /* sending acks */ + unsigned long i_ack_flags; + u64 i_ack_next; /* next ACK to send */ + struct rds_header *i_ack; + struct ib_send_wr i_ack_wr; + struct ib_sge i_ack_sge; + u64 i_ack_dma; + unsigned long i_ack_queued; + + /* Flow control related information + * + * Our algorithm uses a pair variables that we need to access + * atomically - one for the send credits, and one posted + * recv credits we need to transfer to remote. + * Rather than protect them using a slow spinlock, we put both into + * a single atomic_t and update it using cmpxchg + */ + atomic_t i_credits; + + /* Protocol version specific information */ + unsigned int i_flowctl:1; /* enable/disable flow ctl */ + unsigned int i_dma_local_lkey:1; + unsigned int i_fastreg_posted:1; /* fastreg posted on this connection */ + /* Batched completions */ + unsigned int i_unsignaled_wrs; + long i_unsignaled_bytes; +}; + +/* This assumes that atomic_t is at least 32 bits */ +#define IB_GET_SEND_CREDITS(v) ((v) & 0xffff) +#define IB_GET_POST_CREDITS(v) ((v) >> 16) +#define IB_SET_SEND_CREDITS(v) ((v) & 0xffff) +#define IB_SET_POST_CREDITS(v) ((v) << 16) + +struct rds_iw_cm_id { + struct list_head list; + struct rdma_cm_id *cm_id; +}; + +struct rds_iw_device { + struct list_head list; + struct list_head cm_id_list; + struct list_head conn_list; + struct ib_device *dev; + struct ib_pd *pd; + struct ib_mr *mr; + struct rds_iw_mr_pool *mr_pool; + int page_shift; + int max_sge; + unsigned int max_wrs; + unsigned int dma_local_lkey:1; + spinlock_t spinlock; /* protect the above */ +}; + +/* bits for i_ack_flags */ +#define IB_ACK_IN_FLIGHT 0 +#define IB_ACK_REQUESTED 1 + +/* Magic WR_ID for ACKs */ +#define RDS_IW_ACK_WR_ID ((u64)0xffffffffffffffffULL) +#define RDS_IW_FAST_REG_WR_ID ((u64)0xefefefefefefefefULL) +#define RDS_IW_LOCAL_INV_WR_ID ((u64)0xdfdfdfdfdfdfdfdfULL) + +struct rds_iw_statistics { + uint64_t s_iw_connect_raced; + uint64_t s_iw_listen_closed_stale; + uint64_t s_iw_tx_cq_call; + uint64_t s_iw_tx_cq_event; + uint64_t s_iw_tx_ring_full; + uint64_t s_iw_tx_throttle; + uint64_t s_iw_tx_sg_mapping_failure; + uint64_t s_iw_tx_stalled; + uint64_t s_iw_tx_credit_updates; + uint64_t s_iw_rx_cq_call; + uint64_t s_iw_rx_cq_event; + uint64_t s_iw_rx_ring_empty; + uint64_t s_iw_rx_refill_from_cq; + uint64_t s_iw_rx_refill_from_thread; + uint64_t s_iw_rx_alloc_limit; + uint64_t s_iw_rx_credit_updates; + uint64_t s_iw_ack_sent; + uint64_t s_iw_ack_send_failure; + uint64_t s_iw_ack_send_delayed; + uint64_t s_iw_ack_send_piggybacked; + uint64_t s_iw_ack_received; + uint64_t s_iw_rdma_mr_alloc; + uint64_t s_iw_rdma_mr_free; + uint64_t s_iw_rdma_mr_used; + uint64_t s_iw_rdma_mr_pool_flush; + uint64_t s_iw_rdma_mr_pool_wait; + uint64_t s_iw_rdma_mr_pool_depleted; +}; + +extern struct workqueue_struct *rds_iw_wq; + +/* + * Fake ib_dma_sync_sg_for_{cpu,device} as long as ib_verbs.h + * doesn't define it. + */ +static inline void rds_iw_dma_sync_sg_for_cpu(struct ib_device *dev, + struct scatterlist *sg, unsigned int sg_dma_len, int direction) +{ + unsigned int i; + + for (i = 0; i < sg_dma_len; ++i) { + ib_dma_sync_single_for_cpu(dev, + ib_sg_dma_address(dev, &sg[i]), + ib_sg_dma_len(dev, &sg[i]), + direction); + } +} +#define ib_dma_sync_sg_for_cpu rds_iw_dma_sync_sg_for_cpu + +static inline void rds_iw_dma_sync_sg_for_device(struct ib_device *dev, + struct scatterlist *sg, unsigned int sg_dma_len, int direction) +{ + unsigned int i; + + for (i = 0; i < sg_dma_len; ++i) { + ib_dma_sync_single_for_device(dev, + ib_sg_dma_address(dev, &sg[i]), + ib_sg_dma_len(dev, &sg[i]), + direction); + } +} +#define ib_dma_sync_sg_for_device rds_iw_dma_sync_sg_for_device + +static inline u32 rds_iw_local_dma_lkey(struct rds_iw_connection *ic) +{ + return ic->i_dma_local_lkey ? ic->i_cm_id->device->local_dma_lkey : ic->i_mr->lkey; +} + +/* ib.c */ +extern struct rds_transport rds_iw_transport; +extern void rds_iw_add_one(struct ib_device *device); +extern void rds_iw_remove_one(struct ib_device *device); +extern struct ib_client rds_iw_client; + +extern unsigned int fastreg_pool_size; +extern unsigned int fastreg_message_size; + +extern spinlock_t iw_nodev_conns_lock; +extern struct list_head iw_nodev_conns; + +/* ib_cm.c */ +int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp); +void rds_iw_conn_free(void *arg); +int rds_iw_conn_connect(struct rds_connection *conn); +void rds_iw_conn_shutdown(struct rds_connection *conn); +void rds_iw_state_change(struct sock *sk); +int __init rds_iw_listen_init(void); +void rds_iw_listen_stop(void); +void __rds_iw_conn_error(struct rds_connection *conn, const char *, ...); +int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event); +int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id); +void rds_iw_cm_connect_complete(struct rds_connection *conn, + struct rdma_cm_event *event); + + +#define rds_iw_conn_error(conn, fmt...) \ + __rds_iw_conn_error(conn, KERN_WARNING "RDS/IW: " fmt) + +/* ib_rdma.c */ +int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); +int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); +void rds_iw_remove_nodev_conns(void); +void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); +struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); +void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); +void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); +void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, + struct rds_sock *rs, u32 *key_ret); +void rds_iw_sync_mr(void *trans_private, int dir); +void rds_iw_free_mr(void *trans_private, int invalidate); +void rds_iw_flush_mrs(void); +void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); + +/* ib_recv.c */ +int __init rds_iw_recv_init(void); +void rds_iw_recv_exit(void); +int rds_iw_recv(struct rds_connection *conn); +int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, + gfp_t page_gfp, int prefill); +void rds_iw_inc_purge(struct rds_incoming *inc); +void rds_iw_inc_free(struct rds_incoming *inc); +int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *iov, + size_t size); +void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context); +void rds_iw_recv_init_ring(struct rds_iw_connection *ic); +void rds_iw_recv_clear_ring(struct rds_iw_connection *ic); +void rds_iw_recv_init_ack(struct rds_iw_connection *ic); +void rds_iw_attempt_ack(struct rds_iw_connection *ic); +void rds_iw_ack_send_complete(struct rds_iw_connection *ic); +u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic); + +/* ib_ring.c */ +void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr); +void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr); +u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos); +void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val); +void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val); +int rds_iw_ring_empty(struct rds_iw_work_ring *ring); +int rds_iw_ring_low(struct rds_iw_work_ring *ring); +u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring); +u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest); +extern wait_queue_head_t rds_iw_ring_empty_wait; + +/* ib_send.c */ +void rds_iw_xmit_complete(struct rds_connection *conn); +int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, + unsigned int hdr_off, unsigned int sg, unsigned int off); +void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context); +void rds_iw_send_init_ring(struct rds_iw_connection *ic); +void rds_iw_send_clear_ring(struct rds_iw_connection *ic); +int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op); +void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits); +void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted); +int rds_iw_send_grab_credits(struct rds_iw_connection *ic, u32 wanted, + u32 *adv_credits, int need_posted); + +/* ib_stats.c */ +DECLARE_PER_CPU(struct rds_iw_statistics, rds_iw_stats); +#define rds_iw_stats_inc(member) rds_stats_inc_which(rds_iw_stats, member) +unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter, + unsigned int avail); + +/* ib_sysctl.c */ +int __init rds_iw_sysctl_init(void); +void rds_iw_sysctl_exit(void); +extern unsigned long rds_iw_sysctl_max_send_wr; +extern unsigned long rds_iw_sysctl_max_recv_wr; +extern unsigned long rds_iw_sysctl_max_unsig_wrs; +extern unsigned long rds_iw_sysctl_max_unsig_bytes; +extern unsigned long rds_iw_sysctl_max_recv_allocation; +extern unsigned int rds_iw_sysctl_flow_control; +extern ctl_table rds_iw_sysctl_table[]; + +/* + * Helper functions for getting/setting the header and data SGEs in + * RDS packets (not RDMA) + */ +static inline struct ib_sge * +rds_iw_header_sge(struct rds_iw_connection *ic, struct ib_sge *sge) +{ + return &sge[0]; +} + +static inline struct ib_sge * +rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge) +{ + return &sge[1]; +} + +static inline void rds_iw_set_64bit(u64 *ptr, u64 val) +{ +#if BITS_PER_LONG == 64 + *ptr = val; +#else + set_64bit(ptr, val); +#endif +} + +#endif diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c new file mode 100644 index 000000000000..57ecb3d4b8a5 --- /dev/null +++ b/net/rds/iw_cm.c @@ -0,0 +1,750 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "iw.h" + +/* + * Set the selected protocol version + */ +static void rds_iw_set_protocol(struct rds_connection *conn, unsigned int version) +{ + conn->c_version = version; +} + +/* + * Set up flow control + */ +static void rds_iw_set_flow_control(struct rds_connection *conn, u32 credits) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + + if (rds_iw_sysctl_flow_control && credits != 0) { + /* We're doing flow control */ + ic->i_flowctl = 1; + rds_iw_send_add_credits(conn, credits); + } else { + ic->i_flowctl = 0; + } +} + +/* + * Connection established. + * We get here for both outgoing and incoming connection. + */ +void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_event *event) +{ + const struct rds_iw_connect_private *dp = NULL; + struct rds_iw_connection *ic = conn->c_transport_data; + struct rds_iw_device *rds_iwdev; + int err; + + if (event->param.conn.private_data_len) { + dp = event->param.conn.private_data; + + rds_iw_set_protocol(conn, + RDS_PROTOCOL(dp->dp_protocol_major, + dp->dp_protocol_minor)); + rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); + } + + /* update ib_device with this local ipaddr & conn */ + rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); + err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); + if (err) + printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); + err = rds_iw_add_conn(rds_iwdev, conn); + if (err) + printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err); + + /* If the peer gave us the last packet it saw, process this as if + * we had received a regular ACK. */ + if (dp && dp->dp_ack_seq) + rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); + + printk(KERN_NOTICE "RDS/IW: connected to %pI4<->%pI4 version %u.%u%s\n", + &conn->c_laddr, &conn->c_faddr, + RDS_PROTOCOL_MAJOR(conn->c_version), + RDS_PROTOCOL_MINOR(conn->c_version), + ic->i_flowctl ? ", flow control" : ""); + + rds_connect_complete(conn); +} + +static void rds_iw_cm_fill_conn_param(struct rds_connection *conn, + struct rdma_conn_param *conn_param, + struct rds_iw_connect_private *dp, + u32 protocol_version) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + + memset(conn_param, 0, sizeof(struct rdma_conn_param)); + /* XXX tune these? */ + conn_param->responder_resources = 1; + conn_param->initiator_depth = 1; + + if (dp) { + memset(dp, 0, sizeof(*dp)); + dp->dp_saddr = conn->c_laddr; + dp->dp_daddr = conn->c_faddr; + dp->dp_protocol_major = RDS_PROTOCOL_MAJOR(protocol_version); + dp->dp_protocol_minor = RDS_PROTOCOL_MINOR(protocol_version); + dp->dp_protocol_minor_mask = cpu_to_be16(RDS_IW_SUPPORTED_PROTOCOLS); + dp->dp_ack_seq = rds_iw_piggyb_ack(ic); + + /* Advertise flow control */ + if (ic->i_flowctl) { + unsigned int credits; + + credits = IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)); + dp->dp_credit = cpu_to_be32(credits); + atomic_sub(IB_SET_POST_CREDITS(credits), &ic->i_credits); + } + + conn_param->private_data = dp; + conn_param->private_data_len = sizeof(*dp); + } +} + +static void rds_iw_cq_event_handler(struct ib_event *event, void *data) +{ + rdsdebug("event %u data %p\n", event->event, data); +} + +static void rds_iw_qp_event_handler(struct ib_event *event, void *data) +{ + struct rds_connection *conn = data; + struct rds_iw_connection *ic = conn->c_transport_data; + + rdsdebug("conn %p ic %p event %u\n", conn, ic, event->event); + + switch (event->event) { + case IB_EVENT_COMM_EST: + rdma_notify(ic->i_cm_id, IB_EVENT_COMM_EST); + break; + case IB_EVENT_QP_REQ_ERR: + case IB_EVENT_QP_FATAL: + default: + rds_iw_conn_error(conn, "RDS/IW: Fatal QP Event %u - connection %pI4->%pI4...reconnecting\n", + event->event, &conn->c_laddr, + &conn->c_faddr); + break; + } +} + +/* + * Create a QP + */ +static int rds_iw_init_qp_attrs(struct ib_qp_init_attr *attr, + struct rds_iw_device *rds_iwdev, + struct rds_iw_work_ring *send_ring, + void (*send_cq_handler)(struct ib_cq *, void *), + struct rds_iw_work_ring *recv_ring, + void (*recv_cq_handler)(struct ib_cq *, void *), + void *context) +{ + struct ib_device *dev = rds_iwdev->dev; + unsigned int send_size, recv_size; + int ret; + + /* The offset of 1 is to accomodate the additional ACK WR. */ + send_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_send_wr + 1); + recv_size = min_t(unsigned int, rds_iwdev->max_wrs, rds_iw_sysctl_max_recv_wr + 1); + rds_iw_ring_resize(send_ring, send_size - 1); + rds_iw_ring_resize(recv_ring, recv_size - 1); + + memset(attr, 0, sizeof(*attr)); + attr->event_handler = rds_iw_qp_event_handler; + attr->qp_context = context; + attr->cap.max_send_wr = send_size; + attr->cap.max_recv_wr = recv_size; + attr->cap.max_send_sge = rds_iwdev->max_sge; + attr->cap.max_recv_sge = RDS_IW_RECV_SGE; + attr->sq_sig_type = IB_SIGNAL_REQ_WR; + attr->qp_type = IB_QPT_RC; + + attr->send_cq = ib_create_cq(dev, send_cq_handler, + rds_iw_cq_event_handler, + context, send_size, 0); + if (IS_ERR(attr->send_cq)) { + ret = PTR_ERR(attr->send_cq); + attr->send_cq = NULL; + rdsdebug("ib_create_cq send failed: %d\n", ret); + goto out; + } + + attr->recv_cq = ib_create_cq(dev, recv_cq_handler, + rds_iw_cq_event_handler, + context, recv_size, 0); + if (IS_ERR(attr->recv_cq)) { + ret = PTR_ERR(attr->recv_cq); + attr->recv_cq = NULL; + rdsdebug("ib_create_cq send failed: %d\n", ret); + goto out; + } + + ret = ib_req_notify_cq(attr->send_cq, IB_CQ_NEXT_COMP); + if (ret) { + rdsdebug("ib_req_notify_cq send failed: %d\n", ret); + goto out; + } + + ret = ib_req_notify_cq(attr->recv_cq, IB_CQ_SOLICITED); + if (ret) { + rdsdebug("ib_req_notify_cq recv failed: %d\n", ret); + goto out; + } + +out: + if (ret) { + if (attr->send_cq) + ib_destroy_cq(attr->send_cq); + if (attr->recv_cq) + ib_destroy_cq(attr->recv_cq); + } + return ret; +} + +/* + * This needs to be very careful to not leave IS_ERR pointers around for + * cleanup to trip over. + */ +static int rds_iw_setup_qp(struct rds_connection *conn) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + struct ib_device *dev = ic->i_cm_id->device; + struct ib_qp_init_attr attr; + struct rds_iw_device *rds_iwdev; + int ret; + + /* rds_iw_add_one creates a rds_iw_device object per IB device, + * and allocates a protection domain, memory range and MR pool + * for each. If that fails for any reason, it will not register + * the rds_iwdev at all. + */ + rds_iwdev = ib_get_client_data(dev, &rds_iw_client); + if (rds_iwdev == NULL) { + if (printk_ratelimit()) + printk(KERN_NOTICE "RDS/IW: No client_data for device %s\n", + dev->name); + return -EOPNOTSUPP; + } + + /* Protection domain and memory range */ + ic->i_pd = rds_iwdev->pd; + ic->i_mr = rds_iwdev->mr; + + ret = rds_iw_init_qp_attrs(&attr, rds_iwdev, + &ic->i_send_ring, rds_iw_send_cq_comp_handler, + &ic->i_recv_ring, rds_iw_recv_cq_comp_handler, + conn); + if (ret < 0) + goto out; + + ic->i_send_cq = attr.send_cq; + ic->i_recv_cq = attr.recv_cq; + + /* + * XXX this can fail if max_*_wr is too large? Are we supposed + * to back off until we get a value that the hardware can support? + */ + ret = rdma_create_qp(ic->i_cm_id, ic->i_pd, &attr); + if (ret) { + rdsdebug("rdma_create_qp failed: %d\n", ret); + goto out; + } + + ic->i_send_hdrs = ib_dma_alloc_coherent(dev, + ic->i_send_ring.w_nr * + sizeof(struct rds_header), + &ic->i_send_hdrs_dma, GFP_KERNEL); + if (ic->i_send_hdrs == NULL) { + ret = -ENOMEM; + rdsdebug("ib_dma_alloc_coherent send failed\n"); + goto out; + } + + ic->i_recv_hdrs = ib_dma_alloc_coherent(dev, + ic->i_recv_ring.w_nr * + sizeof(struct rds_header), + &ic->i_recv_hdrs_dma, GFP_KERNEL); + if (ic->i_recv_hdrs == NULL) { + ret = -ENOMEM; + rdsdebug("ib_dma_alloc_coherent recv failed\n"); + goto out; + } + + ic->i_ack = ib_dma_alloc_coherent(dev, sizeof(struct rds_header), + &ic->i_ack_dma, GFP_KERNEL); + if (ic->i_ack == NULL) { + ret = -ENOMEM; + rdsdebug("ib_dma_alloc_coherent ack failed\n"); + goto out; + } + + ic->i_sends = vmalloc(ic->i_send_ring.w_nr * sizeof(struct rds_iw_send_work)); + if (ic->i_sends == NULL) { + ret = -ENOMEM; + rdsdebug("send allocation failed\n"); + goto out; + } + rds_iw_send_init_ring(ic); + + ic->i_recvs = vmalloc(ic->i_recv_ring.w_nr * sizeof(struct rds_iw_recv_work)); + if (ic->i_recvs == NULL) { + ret = -ENOMEM; + rdsdebug("recv allocation failed\n"); + goto out; + } + + rds_iw_recv_init_ring(ic); + rds_iw_recv_init_ack(ic); + + /* Post receive buffers - as a side effect, this will update + * the posted credit count. */ + rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 1); + + rdsdebug("conn %p pd %p mr %p cq %p %p\n", conn, ic->i_pd, ic->i_mr, + ic->i_send_cq, ic->i_recv_cq); + +out: + return ret; +} + +static u32 rds_iw_protocol_compatible(const struct rds_iw_connect_private *dp) +{ + u16 common; + u32 version = 0; + + /* rdma_cm private data is odd - when there is any private data in the + * request, we will be given a pretty large buffer without telling us the + * original size. The only way to tell the difference is by looking at + * the contents, which are initialized to zero. + * If the protocol version fields aren't set, this is a connection attempt + * from an older version. This could could be 3.0 or 2.0 - we can't tell. + * We really should have changed this for OFED 1.3 :-( */ + if (dp->dp_protocol_major == 0) + return RDS_PROTOCOL_3_0; + + common = be16_to_cpu(dp->dp_protocol_minor_mask) & RDS_IW_SUPPORTED_PROTOCOLS; + if (dp->dp_protocol_major == 3 && common) { + version = RDS_PROTOCOL_3_0; + while ((common >>= 1) != 0) + version++; + } else if (printk_ratelimit()) { + printk(KERN_NOTICE "RDS: Connection from %pI4 using " + "incompatible protocol version %u.%u\n", + &dp->dp_saddr, + dp->dp_protocol_major, + dp->dp_protocol_minor); + } + return version; +} + +int rds_iw_cm_handle_connect(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + const struct rds_iw_connect_private *dp = event->param.conn.private_data; + struct rds_iw_connect_private dp_rep; + struct rds_connection *conn = NULL; + struct rds_iw_connection *ic = NULL; + struct rdma_conn_param conn_param; + struct rds_iw_device *rds_iwdev; + u32 version; + int err, destroy = 1; + + /* Check whether the remote protocol version matches ours. */ + version = rds_iw_protocol_compatible(dp); + if (!version) + goto out; + + rdsdebug("saddr %pI4 daddr %pI4 RDSv%u.%u\n", + &dp->dp_saddr, &dp->dp_daddr, + RDS_PROTOCOL_MAJOR(version), RDS_PROTOCOL_MINOR(version)); + + conn = rds_conn_create(dp->dp_daddr, dp->dp_saddr, &rds_iw_transport, + GFP_KERNEL); + if (IS_ERR(conn)) { + rdsdebug("rds_conn_create failed (%ld)\n", PTR_ERR(conn)); + conn = NULL; + goto out; + } + + /* + * The connection request may occur while the + * previous connection exist, e.g. in case of failover. + * But as connections may be initiated simultaneously + * by both hosts, we have a random backoff mechanism - + * see the comment above rds_queue_reconnect() + */ + mutex_lock(&conn->c_cm_lock); + if (!rds_conn_transition(conn, RDS_CONN_DOWN, RDS_CONN_CONNECTING)) { + if (rds_conn_state(conn) == RDS_CONN_UP) { + rdsdebug("incoming connect while connecting\n"); + rds_conn_drop(conn); + rds_iw_stats_inc(s_iw_listen_closed_stale); + } else + if (rds_conn_state(conn) == RDS_CONN_CONNECTING) { + /* Wait and see - our connect may still be succeeding */ + rds_iw_stats_inc(s_iw_connect_raced); + } + mutex_unlock(&conn->c_cm_lock); + goto out; + } + + ic = conn->c_transport_data; + + rds_iw_set_protocol(conn, version); + rds_iw_set_flow_control(conn, be32_to_cpu(dp->dp_credit)); + + /* If the peer gave us the last packet it saw, process this as if + * we had received a regular ACK. */ + if (dp->dp_ack_seq) + rds_send_drop_acked(conn, be64_to_cpu(dp->dp_ack_seq), NULL); + + BUG_ON(cm_id->context); + BUG_ON(ic->i_cm_id); + + ic->i_cm_id = cm_id; + cm_id->context = conn; + + rds_iwdev = ib_get_client_data(cm_id->device, &rds_iw_client); + ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey; + + /* We got halfway through setting up the ib_connection, if we + * fail now, we have to take the long route out of this mess. */ + destroy = 0; + + err = rds_iw_setup_qp(conn); + if (err) { + rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", err); + goto out; + } + + rds_iw_cm_fill_conn_param(conn, &conn_param, &dp_rep, version); + + /* rdma_accept() calls rdma_reject() internally if it fails */ + err = rdma_accept(cm_id, &conn_param); + mutex_unlock(&conn->c_cm_lock); + if (err) { + rds_iw_conn_error(conn, "rdma_accept failed (%d)\n", err); + goto out; + } + + return 0; + +out: + rdma_reject(cm_id, NULL, 0); + return destroy; +} + + +int rds_iw_cm_initiate_connect(struct rdma_cm_id *cm_id) +{ + struct rds_connection *conn = cm_id->context; + struct rds_iw_connection *ic = conn->c_transport_data; + struct rdma_conn_param conn_param; + struct rds_iw_connect_private dp; + int ret; + + /* If the peer doesn't do protocol negotiation, we must + * default to RDSv3.0 */ + rds_iw_set_protocol(conn, RDS_PROTOCOL_3_0); + ic->i_flowctl = rds_iw_sysctl_flow_control; /* advertise flow control */ + + ret = rds_iw_setup_qp(conn); + if (ret) { + rds_iw_conn_error(conn, "rds_iw_setup_qp failed (%d)\n", ret); + goto out; + } + + rds_iw_cm_fill_conn_param(conn, &conn_param, &dp, RDS_PROTOCOL_VERSION); + + ret = rdma_connect(cm_id, &conn_param); + if (ret) + rds_iw_conn_error(conn, "rdma_connect failed (%d)\n", ret); + +out: + /* Beware - returning non-zero tells the rdma_cm to destroy + * the cm_id. We should certainly not do it as long as we still + * "own" the cm_id. */ + if (ret) { + struct rds_iw_connection *ic = conn->c_transport_data; + + if (ic->i_cm_id == cm_id) + ret = 0; + } + return ret; +} + +int rds_iw_conn_connect(struct rds_connection *conn) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + struct rds_iw_device *rds_iwdev; + struct sockaddr_in src, dest; + int ret; + + /* XXX I wonder what affect the port space has */ + /* delegate cm event handler to rdma_transport */ + ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn, + RDMA_PS_TCP); + if (IS_ERR(ic->i_cm_id)) { + ret = PTR_ERR(ic->i_cm_id); + ic->i_cm_id = NULL; + rdsdebug("rdma_create_id() failed: %d\n", ret); + goto out; + } + + rdsdebug("created cm id %p for conn %p\n", ic->i_cm_id, conn); + + src.sin_family = AF_INET; + src.sin_addr.s_addr = (__force u32)conn->c_laddr; + src.sin_port = (__force u16)htons(0); + + /* First, bind to the local address and device. */ + ret = rdma_bind_addr(ic->i_cm_id, (struct sockaddr *) &src); + if (ret) { + rdsdebug("rdma_bind_addr(%pI4) failed: %d\n", + &conn->c_laddr, ret); + rdma_destroy_id(ic->i_cm_id); + ic->i_cm_id = NULL; + goto out; + } + + rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); + ic->i_dma_local_lkey = rds_iwdev->dma_local_lkey; + + dest.sin_family = AF_INET; + dest.sin_addr.s_addr = (__force u32)conn->c_faddr; + dest.sin_port = (__force u16)htons(RDS_PORT); + + ret = rdma_resolve_addr(ic->i_cm_id, (struct sockaddr *)&src, + (struct sockaddr *)&dest, + RDS_RDMA_RESOLVE_TIMEOUT_MS); + if (ret) { + rdsdebug("addr resolve failed for cm id %p: %d\n", ic->i_cm_id, + ret); + rdma_destroy_id(ic->i_cm_id); + ic->i_cm_id = NULL; + } + +out: + return ret; +} + +/* + * This is so careful about only cleaning up resources that were built up + * so that it can be called at any point during startup. In fact it + * can be called multiple times for a given connection. + */ +void rds_iw_conn_shutdown(struct rds_connection *conn) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + int err = 0; + struct ib_qp_attr qp_attr; + + rdsdebug("cm %p pd %p cq %p %p qp %p\n", ic->i_cm_id, + ic->i_pd, ic->i_send_cq, ic->i_recv_cq, + ic->i_cm_id ? ic->i_cm_id->qp : NULL); + + if (ic->i_cm_id) { + struct ib_device *dev = ic->i_cm_id->device; + + rdsdebug("disconnecting cm %p\n", ic->i_cm_id); + err = rdma_disconnect(ic->i_cm_id); + if (err) { + /* Actually this may happen quite frequently, when + * an outgoing connect raced with an incoming connect. + */ + rdsdebug("rds_iw_conn_shutdown: failed to disconnect," + " cm: %p err %d\n", ic->i_cm_id, err); + } + + if (ic->i_cm_id->qp) { + qp_attr.qp_state = IB_QPS_ERR; + ib_modify_qp(ic->i_cm_id->qp, &qp_attr, IB_QP_STATE); + } + + wait_event(rds_iw_ring_empty_wait, + rds_iw_ring_empty(&ic->i_send_ring) && + rds_iw_ring_empty(&ic->i_recv_ring)); + + if (ic->i_send_hdrs) + ib_dma_free_coherent(dev, + ic->i_send_ring.w_nr * + sizeof(struct rds_header), + ic->i_send_hdrs, + ic->i_send_hdrs_dma); + + if (ic->i_recv_hdrs) + ib_dma_free_coherent(dev, + ic->i_recv_ring.w_nr * + sizeof(struct rds_header), + ic->i_recv_hdrs, + ic->i_recv_hdrs_dma); + + if (ic->i_ack) + ib_dma_free_coherent(dev, sizeof(struct rds_header), + ic->i_ack, ic->i_ack_dma); + + if (ic->i_sends) + rds_iw_send_clear_ring(ic); + if (ic->i_recvs) + rds_iw_recv_clear_ring(ic); + + if (ic->i_cm_id->qp) + rdma_destroy_qp(ic->i_cm_id); + if (ic->i_send_cq) + ib_destroy_cq(ic->i_send_cq); + if (ic->i_recv_cq) + ib_destroy_cq(ic->i_recv_cq); + + /* + * If associated with an rds_iw_device: + * Move connection back to the nodev list. + * Remove cm_id from the device cm_id list. + */ + if (ic->rds_iwdev) { + + spin_lock_irq(&ic->rds_iwdev->spinlock); + BUG_ON(list_empty(&ic->iw_node)); + list_del(&ic->iw_node); + spin_unlock_irq(&ic->rds_iwdev->spinlock); + + spin_lock_irq(&iw_nodev_conns_lock); + list_add_tail(&ic->iw_node, &iw_nodev_conns); + spin_unlock_irq(&iw_nodev_conns_lock); + rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); + ic->rds_iwdev = NULL; + } + + rdma_destroy_id(ic->i_cm_id); + + ic->i_cm_id = NULL; + ic->i_pd = NULL; + ic->i_mr = NULL; + ic->i_send_cq = NULL; + ic->i_recv_cq = NULL; + ic->i_send_hdrs = NULL; + ic->i_recv_hdrs = NULL; + ic->i_ack = NULL; + } + BUG_ON(ic->rds_iwdev); + + /* Clear pending transmit */ + if (ic->i_rm) { + rds_message_put(ic->i_rm); + ic->i_rm = NULL; + } + + /* Clear the ACK state */ + clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); + rds_iw_set_64bit(&ic->i_ack_next, 0); + ic->i_ack_recv = 0; + + /* Clear flow control state */ + ic->i_flowctl = 0; + atomic_set(&ic->i_credits, 0); + + rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr); + rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr); + + if (ic->i_iwinc) { + rds_inc_put(&ic->i_iwinc->ii_inc); + ic->i_iwinc = NULL; + } + + vfree(ic->i_sends); + ic->i_sends = NULL; + vfree(ic->i_recvs); + ic->i_recvs = NULL; + rdsdebug("shutdown complete\n"); +} + +int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) +{ + struct rds_iw_connection *ic; + unsigned long flags; + + /* XXX too lazy? */ + ic = kzalloc(sizeof(struct rds_iw_connection), GFP_KERNEL); + if (ic == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&ic->iw_node); + mutex_init(&ic->i_recv_mutex); + + /* + * rds_iw_conn_shutdown() waits for these to be emptied so they + * must be initialized before it can be called. + */ + rds_iw_ring_init(&ic->i_send_ring, rds_iw_sysctl_max_send_wr); + rds_iw_ring_init(&ic->i_recv_ring, rds_iw_sysctl_max_recv_wr); + + ic->conn = conn; + conn->c_transport_data = ic; + + spin_lock_irqsave(&iw_nodev_conns_lock, flags); + list_add_tail(&ic->iw_node, &iw_nodev_conns); + spin_unlock_irqrestore(&iw_nodev_conns_lock, flags); + + + rdsdebug("conn %p conn ic %p\n", conn, conn->c_transport_data); + return 0; +} + +void rds_iw_conn_free(void *arg) +{ + struct rds_iw_connection *ic = arg; + rdsdebug("ic %p\n", ic); + list_del(&ic->iw_node); + kfree(ic); +} + +/* + * An error occurred on the connection + */ +void +__rds_iw_conn_error(struct rds_connection *conn, const char *fmt, ...) +{ + va_list ap; + + rds_conn_drop(conn); + + va_start(ap, fmt); + vprintk(fmt, ap); + va_end(ap); +} diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c new file mode 100644 index 000000000000..1c02a8f952d0 --- /dev/null +++ b/net/rds/iw_rdma.c @@ -0,0 +1,888 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "rds.h" +#include "rdma.h" +#include "iw.h" + + +/* + * This is stored as mr->r_trans_private. + */ +struct rds_iw_mr { + struct rds_iw_device *device; + struct rds_iw_mr_pool *pool; + struct rdma_cm_id *cm_id; + + struct ib_mr *mr; + struct ib_fast_reg_page_list *page_list; + + struct rds_iw_mapping mapping; + unsigned char remap_count; +}; + +/* + * Our own little MR pool + */ +struct rds_iw_mr_pool { + struct rds_iw_device *device; /* back ptr to the device that owns us */ + + struct mutex flush_lock; /* serialize fmr invalidate */ + struct work_struct flush_worker; /* flush worker */ + + spinlock_t list_lock; /* protect variables below */ + atomic_t item_count; /* total # of MRs */ + atomic_t dirty_count; /* # dirty of MRs */ + struct list_head dirty_list; /* dirty mappings */ + struct list_head clean_list; /* unused & unamapped MRs */ + atomic_t free_pinned; /* memory pinned by free MRs */ + unsigned long max_message_size; /* in pages */ + unsigned long max_items; + unsigned long max_items_soft; + unsigned long max_free_pinned; + int max_pages; +}; + +static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all); +static void rds_iw_mr_pool_flush_worker(struct work_struct *work); +static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); +static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr, + struct scatterlist *sg, unsigned int nents); +static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); +static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, + struct list_head *unmap_list, + struct list_head *kill_list); +static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, struct rds_iw_mr *ibmr); + +static int rds_iw_get_device(struct rds_sock *rs, struct rds_iw_device **rds_iwdev, struct rdma_cm_id **cm_id) +{ + struct rds_iw_device *iwdev; + struct rds_iw_cm_id *i_cm_id; + + *rds_iwdev = NULL; + *cm_id = NULL; + + list_for_each_entry(iwdev, &rds_iw_devices, list) { + spin_lock_irq(&iwdev->spinlock); + list_for_each_entry(i_cm_id, &iwdev->cm_id_list, list) { + struct sockaddr_in *src_addr, *dst_addr; + + src_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.src_addr; + dst_addr = (struct sockaddr_in *)&i_cm_id->cm_id->route.addr.dst_addr; + + rdsdebug("local ipaddr = %x port %d, " + "remote ipaddr = %x port %d" + "..looking for %x port %d, " + "remote ipaddr = %x port %d\n", + src_addr->sin_addr.s_addr, + src_addr->sin_port, + dst_addr->sin_addr.s_addr, + dst_addr->sin_port, + rs->rs_bound_addr, + rs->rs_bound_port, + rs->rs_conn_addr, + rs->rs_conn_port); +#ifdef WORKING_TUPLE_DETECTION + if (src_addr->sin_addr.s_addr == rs->rs_bound_addr && + src_addr->sin_port == rs->rs_bound_port && + dst_addr->sin_addr.s_addr == rs->rs_conn_addr && + dst_addr->sin_port == rs->rs_conn_port) { +#else + /* FIXME - needs to compare the local and remote + * ipaddr/port tuple, but the ipaddr is the only + * available infomation in the rds_sock (as the rest are + * zero'ed. It doesn't appear to be properly populated + * during connection setup... + */ + if (src_addr->sin_addr.s_addr == rs->rs_bound_addr) { +#endif + spin_unlock_irq(&iwdev->spinlock); + *rds_iwdev = iwdev; + *cm_id = i_cm_id->cm_id; + return 0; + } + } + spin_unlock_irq(&iwdev->spinlock); + } + + return 1; +} + +static int rds_iw_add_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id) +{ + struct rds_iw_cm_id *i_cm_id; + + i_cm_id = kmalloc(sizeof *i_cm_id, GFP_KERNEL); + if (!i_cm_id) + return -ENOMEM; + + i_cm_id->cm_id = cm_id; + + spin_lock_irq(&rds_iwdev->spinlock); + list_add_tail(&i_cm_id->list, &rds_iwdev->cm_id_list); + spin_unlock_irq(&rds_iwdev->spinlock); + + return 0; +} + +void rds_iw_remove_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id) +{ + struct rds_iw_cm_id *i_cm_id; + + spin_lock_irq(&rds_iwdev->spinlock); + list_for_each_entry(i_cm_id, &rds_iwdev->cm_id_list, list) { + if (i_cm_id->cm_id == cm_id) { + list_del(&i_cm_id->list); + kfree(i_cm_id); + break; + } + } + spin_unlock_irq(&rds_iwdev->spinlock); +} + + +int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id) +{ + struct sockaddr_in *src_addr, *dst_addr; + struct rds_iw_device *rds_iwdev_old; + struct rds_sock rs; + struct rdma_cm_id *pcm_id; + int rc; + + src_addr = (struct sockaddr_in *)&cm_id->route.addr.src_addr; + dst_addr = (struct sockaddr_in *)&cm_id->route.addr.dst_addr; + + rs.rs_bound_addr = src_addr->sin_addr.s_addr; + rs.rs_bound_port = src_addr->sin_port; + rs.rs_conn_addr = dst_addr->sin_addr.s_addr; + rs.rs_conn_port = dst_addr->sin_port; + + rc = rds_iw_get_device(&rs, &rds_iwdev_old, &pcm_id); + if (rc) + rds_iw_remove_cm_id(rds_iwdev, cm_id); + + return rds_iw_add_cm_id(rds_iwdev, cm_id); +} + +int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + + /* conn was previously on the nodev_conns_list */ + spin_lock_irq(&iw_nodev_conns_lock); + BUG_ON(list_empty(&iw_nodev_conns)); + BUG_ON(list_empty(&ic->iw_node)); + list_del(&ic->iw_node); + spin_unlock_irq(&iw_nodev_conns_lock); + + spin_lock_irq(&rds_iwdev->spinlock); + list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); + spin_unlock_irq(&rds_iwdev->spinlock); + + ic->rds_iwdev = rds_iwdev; + + return 0; +} + +void rds_iw_remove_nodev_conns(void) +{ + struct rds_iw_connection *ic, *_ic; + LIST_HEAD(tmp_list); + + /* avoid calling conn_destroy with irqs off */ + spin_lock_irq(&iw_nodev_conns_lock); + list_splice(&iw_nodev_conns, &tmp_list); + INIT_LIST_HEAD(&iw_nodev_conns); + spin_unlock_irq(&iw_nodev_conns_lock); + + list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { + if (ic->conn->c_passive) + rds_conn_destroy(ic->conn->c_passive); + rds_conn_destroy(ic->conn); + } +} + +void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) +{ + struct rds_iw_connection *ic, *_ic; + LIST_HEAD(tmp_list); + + /* avoid calling conn_destroy with irqs off */ + spin_lock_irq(&rds_iwdev->spinlock); + list_splice(&rds_iwdev->conn_list, &tmp_list); + INIT_LIST_HEAD(&rds_iwdev->conn_list); + spin_unlock_irq(&rds_iwdev->spinlock); + + list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { + if (ic->conn->c_passive) + rds_conn_destroy(ic->conn->c_passive); + rds_conn_destroy(ic->conn); + } +} + +static void rds_iw_set_scatterlist(struct rds_iw_scatterlist *sg, + struct scatterlist *list, unsigned int sg_len) +{ + sg->list = list; + sg->len = sg_len; + sg->dma_len = 0; + sg->dma_npages = 0; + sg->bytes = 0; +} + +static u64 *rds_iw_map_scatterlist(struct rds_iw_device *rds_iwdev, + struct rds_iw_scatterlist *sg, + unsigned int dma_page_shift) +{ + struct ib_device *dev = rds_iwdev->dev; + u64 *dma_pages = NULL; + u64 dma_mask; + unsigned int dma_page_size; + int i, j, ret; + + dma_page_size = 1 << dma_page_shift; + dma_mask = dma_page_size - 1; + + WARN_ON(sg->dma_len); + + sg->dma_len = ib_dma_map_sg(dev, sg->list, sg->len, DMA_BIDIRECTIONAL); + if (unlikely(!sg->dma_len)) { + printk(KERN_WARNING "RDS/IW: dma_map_sg failed!\n"); + return ERR_PTR(-EBUSY); + } + + sg->bytes = 0; + sg->dma_npages = 0; + + ret = -EINVAL; + for (i = 0; i < sg->dma_len; ++i) { + unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]); + u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]); + u64 end_addr; + + sg->bytes += dma_len; + + end_addr = dma_addr + dma_len; + if (dma_addr & dma_mask) { + if (i > 0) + goto out_unmap; + dma_addr &= ~dma_mask; + } + if (end_addr & dma_mask) { + if (i < sg->dma_len - 1) + goto out_unmap; + end_addr = (end_addr + dma_mask) & ~dma_mask; + } + + sg->dma_npages += (end_addr - dma_addr) >> dma_page_shift; + } + + /* Now gather the dma addrs into one list */ + if (sg->dma_npages > fastreg_message_size) + goto out_unmap; + + dma_pages = kmalloc(sizeof(u64) * sg->dma_npages, GFP_ATOMIC); + if (!dma_pages) { + ret = -ENOMEM; + goto out_unmap; + } + + for (i = j = 0; i < sg->dma_len; ++i) { + unsigned int dma_len = ib_sg_dma_len(dev, &sg->list[i]); + u64 dma_addr = ib_sg_dma_address(dev, &sg->list[i]); + u64 end_addr; + + end_addr = dma_addr + dma_len; + dma_addr &= ~dma_mask; + for (; dma_addr < end_addr; dma_addr += dma_page_size) + dma_pages[j++] = dma_addr; + BUG_ON(j > sg->dma_npages); + } + + return dma_pages; + +out_unmap: + ib_dma_unmap_sg(rds_iwdev->dev, sg->list, sg->len, DMA_BIDIRECTIONAL); + sg->dma_len = 0; + kfree(dma_pages); + return ERR_PTR(ret); +} + + +struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *rds_iwdev) +{ + struct rds_iw_mr_pool *pool; + + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) { + printk(KERN_WARNING "RDS/IW: rds_iw_create_mr_pool alloc error\n"); + return ERR_PTR(-ENOMEM); + } + + pool->device = rds_iwdev; + INIT_LIST_HEAD(&pool->dirty_list); + INIT_LIST_HEAD(&pool->clean_list); + mutex_init(&pool->flush_lock); + spin_lock_init(&pool->list_lock); + INIT_WORK(&pool->flush_worker, rds_iw_mr_pool_flush_worker); + + pool->max_message_size = fastreg_message_size; + pool->max_items = fastreg_pool_size; + pool->max_free_pinned = pool->max_items * pool->max_message_size / 4; + pool->max_pages = fastreg_message_size; + + /* We never allow more than max_items MRs to be allocated. + * When we exceed more than max_items_soft, we start freeing + * items more aggressively. + * Make sure that max_items > max_items_soft > max_items / 2 + */ + pool->max_items_soft = pool->max_items * 3 / 4; + + return pool; +} + +void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo) +{ + struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool; + + iinfo->rdma_mr_max = pool->max_items; + iinfo->rdma_mr_size = pool->max_pages; +} + +void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *pool) +{ + flush_workqueue(rds_wq); + rds_iw_flush_mr_pool(pool, 1); + BUG_ON(atomic_read(&pool->item_count)); + BUG_ON(atomic_read(&pool->free_pinned)); + kfree(pool); +} + +static inline struct rds_iw_mr *rds_iw_reuse_fmr(struct rds_iw_mr_pool *pool) +{ + struct rds_iw_mr *ibmr = NULL; + unsigned long flags; + + spin_lock_irqsave(&pool->list_lock, flags); + if (!list_empty(&pool->clean_list)) { + ibmr = list_entry(pool->clean_list.next, struct rds_iw_mr, mapping.m_list); + list_del_init(&ibmr->mapping.m_list); + } + spin_unlock_irqrestore(&pool->list_lock, flags); + + return ibmr; +} + +static struct rds_iw_mr *rds_iw_alloc_mr(struct rds_iw_device *rds_iwdev) +{ + struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool; + struct rds_iw_mr *ibmr = NULL; + int err = 0, iter = 0; + + while (1) { + ibmr = rds_iw_reuse_fmr(pool); + if (ibmr) + return ibmr; + + /* No clean MRs - now we have the choice of either + * allocating a fresh MR up to the limit imposed by the + * driver, or flush any dirty unused MRs. + * We try to avoid stalling in the send path if possible, + * so we allocate as long as we're allowed to. + * + * We're fussy with enforcing the FMR limit, though. If the driver + * tells us we can't use more than N fmrs, we shouldn't start + * arguing with it */ + if (atomic_inc_return(&pool->item_count) <= pool->max_items) + break; + + atomic_dec(&pool->item_count); + + if (++iter > 2) { + rds_iw_stats_inc(s_iw_rdma_mr_pool_depleted); + return ERR_PTR(-EAGAIN); + } + + /* We do have some empty MRs. Flush them out. */ + rds_iw_stats_inc(s_iw_rdma_mr_pool_wait); + rds_iw_flush_mr_pool(pool, 0); + } + + ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); + if (!ibmr) { + err = -ENOMEM; + goto out_no_cigar; + } + + spin_lock_init(&ibmr->mapping.m_lock); + INIT_LIST_HEAD(&ibmr->mapping.m_list); + ibmr->mapping.m_mr = ibmr; + + err = rds_iw_init_fastreg(pool, ibmr); + if (err) + goto out_no_cigar; + + rds_iw_stats_inc(s_iw_rdma_mr_alloc); + return ibmr; + +out_no_cigar: + if (ibmr) { + rds_iw_destroy_fastreg(pool, ibmr); + kfree(ibmr); + } + atomic_dec(&pool->item_count); + return ERR_PTR(err); +} + +void rds_iw_sync_mr(void *trans_private, int direction) +{ + struct rds_iw_mr *ibmr = trans_private; + struct rds_iw_device *rds_iwdev = ibmr->device; + + switch (direction) { + case DMA_FROM_DEVICE: + ib_dma_sync_sg_for_cpu(rds_iwdev->dev, ibmr->mapping.m_sg.list, + ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL); + break; + case DMA_TO_DEVICE: + ib_dma_sync_sg_for_device(rds_iwdev->dev, ibmr->mapping.m_sg.list, + ibmr->mapping.m_sg.dma_len, DMA_BIDIRECTIONAL); + break; + } +} + +static inline unsigned int rds_iw_flush_goal(struct rds_iw_mr_pool *pool, int free_all) +{ + unsigned int item_count; + + item_count = atomic_read(&pool->item_count); + if (free_all) + return item_count; + + return 0; +} + +/* + * Flush our pool of MRs. + * At a minimum, all currently unused MRs are unmapped. + * If the number of MRs allocated exceeds the limit, we also try + * to free as many MRs as needed to get back to this limit. + */ +static int rds_iw_flush_mr_pool(struct rds_iw_mr_pool *pool, int free_all) +{ + struct rds_iw_mr *ibmr, *next; + LIST_HEAD(unmap_list); + LIST_HEAD(kill_list); + unsigned long flags; + unsigned int nfreed = 0, ncleaned = 0, free_goal; + int ret = 0; + + rds_iw_stats_inc(s_iw_rdma_mr_pool_flush); + + mutex_lock(&pool->flush_lock); + + spin_lock_irqsave(&pool->list_lock, flags); + /* Get the list of all mappings to be destroyed */ + list_splice_init(&pool->dirty_list, &unmap_list); + if (free_all) + list_splice_init(&pool->clean_list, &kill_list); + spin_unlock_irqrestore(&pool->list_lock, flags); + + free_goal = rds_iw_flush_goal(pool, free_all); + + /* Batched invalidate of dirty MRs. + * For FMR based MRs, the mappings on the unmap list are + * actually members of an ibmr (ibmr->mapping). They either + * migrate to the kill_list, or have been cleaned and should be + * moved to the clean_list. + * For fastregs, they will be dynamically allocated, and + * will be destroyed by the unmap function. + */ + if (!list_empty(&unmap_list)) { + ncleaned = rds_iw_unmap_fastreg_list(pool, &unmap_list, &kill_list); + /* If we've been asked to destroy all MRs, move those + * that were simply cleaned to the kill list */ + if (free_all) + list_splice_init(&unmap_list, &kill_list); + } + + /* Destroy any MRs that are past their best before date */ + list_for_each_entry_safe(ibmr, next, &kill_list, mapping.m_list) { + rds_iw_stats_inc(s_iw_rdma_mr_free); + list_del(&ibmr->mapping.m_list); + rds_iw_destroy_fastreg(pool, ibmr); + kfree(ibmr); + nfreed++; + } + + /* Anything that remains are laundered ibmrs, which we can add + * back to the clean list. */ + if (!list_empty(&unmap_list)) { + spin_lock_irqsave(&pool->list_lock, flags); + list_splice(&unmap_list, &pool->clean_list); + spin_unlock_irqrestore(&pool->list_lock, flags); + } + + atomic_sub(ncleaned, &pool->dirty_count); + atomic_sub(nfreed, &pool->item_count); + + mutex_unlock(&pool->flush_lock); + return ret; +} + +static void rds_iw_mr_pool_flush_worker(struct work_struct *work) +{ + struct rds_iw_mr_pool *pool = container_of(work, struct rds_iw_mr_pool, flush_worker); + + rds_iw_flush_mr_pool(pool, 0); +} + +void rds_iw_free_mr(void *trans_private, int invalidate) +{ + struct rds_iw_mr *ibmr = trans_private; + struct rds_iw_mr_pool *pool = ibmr->device->mr_pool; + + rdsdebug("RDS/IW: free_mr nents %u\n", ibmr->mapping.m_sg.len); + if (!pool) + return; + + /* Return it to the pool's free list */ + rds_iw_free_fastreg(pool, ibmr); + + /* If we've pinned too many pages, request a flush */ + if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned + || atomic_read(&pool->dirty_count) >= pool->max_items / 10) + queue_work(rds_wq, &pool->flush_worker); + + if (invalidate) { + if (likely(!in_interrupt())) { + rds_iw_flush_mr_pool(pool, 0); + } else { + /* We get here if the user created a MR marked + * as use_once and invalidate at the same time. */ + queue_work(rds_wq, &pool->flush_worker); + } + } +} + +void rds_iw_flush_mrs(void) +{ + struct rds_iw_device *rds_iwdev; + + list_for_each_entry(rds_iwdev, &rds_iw_devices, list) { + struct rds_iw_mr_pool *pool = rds_iwdev->mr_pool; + + if (pool) + rds_iw_flush_mr_pool(pool, 0); + } +} + +void *rds_iw_get_mr(struct scatterlist *sg, unsigned long nents, + struct rds_sock *rs, u32 *key_ret) +{ + struct rds_iw_device *rds_iwdev; + struct rds_iw_mr *ibmr = NULL; + struct rdma_cm_id *cm_id; + int ret; + + ret = rds_iw_get_device(rs, &rds_iwdev, &cm_id); + if (ret || !cm_id) { + ret = -ENODEV; + goto out; + } + + if (!rds_iwdev->mr_pool) { + ret = -ENODEV; + goto out; + } + + ibmr = rds_iw_alloc_mr(rds_iwdev); + if (IS_ERR(ibmr)) + return ibmr; + + ibmr->cm_id = cm_id; + ibmr->device = rds_iwdev; + + ret = rds_iw_map_fastreg(rds_iwdev->mr_pool, ibmr, sg, nents); + if (ret == 0) + *key_ret = ibmr->mr->rkey; + else + printk(KERN_WARNING "RDS/IW: failed to map mr (errno=%d)\n", ret); + +out: + if (ret) { + if (ibmr) + rds_iw_free_mr(ibmr, 0); + ibmr = ERR_PTR(ret); + } + return ibmr; +} + +/* + * iWARP fastreg handling + * + * The life cycle of a fastreg registration is a bit different from + * FMRs. + * The idea behind fastreg is to have one MR, to which we bind different + * mappings over time. To avoid stalling on the expensive map and invalidate + * operations, these operations are pipelined on the same send queue on + * which we want to send the message containing the r_key. + * + * This creates a bit of a problem for us, as we do not have the destination + * IP in GET_MR, so the connection must be setup prior to the GET_MR call for + * RDMA to be correctly setup. If a fastreg request is present, rds_iw_xmit + * will try to queue a LOCAL_INV (if needed) and a FAST_REG_MR work request + * before queuing the SEND. When completions for these arrive, they are + * dispatched to the MR has a bit set showing that RDMa can be performed. + * + * There is another interesting aspect that's related to invalidation. + * The application can request that a mapping is invalidated in FREE_MR. + * The expectation there is that this invalidation step includes ALL + * PREVIOUSLY FREED MRs. + */ +static int rds_iw_init_fastreg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr) +{ + struct rds_iw_device *rds_iwdev = pool->device; + struct ib_fast_reg_page_list *page_list = NULL; + struct ib_mr *mr; + int err; + + mr = ib_alloc_fast_reg_mr(rds_iwdev->pd, pool->max_message_size); + if (IS_ERR(mr)) { + err = PTR_ERR(mr); + + printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed (err=%d)\n", err); + return err; + } + + /* FIXME - this is overkill, but mapping->m_sg.dma_len/mapping->m_sg.dma_npages + * is not filled in. + */ + page_list = ib_alloc_fast_reg_page_list(rds_iwdev->dev, pool->max_message_size); + if (IS_ERR(page_list)) { + err = PTR_ERR(page_list); + + printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed (err=%d)\n", err); + ib_dereg_mr(mr); + return err; + } + + ibmr->page_list = page_list; + ibmr->mr = mr; + return 0; +} + +static int rds_iw_rdma_build_fastreg(struct rds_iw_mapping *mapping) +{ + struct rds_iw_mr *ibmr = mapping->m_mr; + struct ib_send_wr f_wr, *failed_wr; + int ret; + + /* + * Perform a WR for the fast_reg_mr. Each individual page + * in the sg list is added to the fast reg page list and placed + * inside the fast_reg_mr WR. The key used is a rolling 8bit + * counter, which should guarantee uniqueness. + */ + ib_update_fast_reg_key(ibmr->mr, ibmr->remap_count++); + mapping->m_rkey = ibmr->mr->rkey; + + memset(&f_wr, 0, sizeof(f_wr)); + f_wr.wr_id = RDS_IW_FAST_REG_WR_ID; + f_wr.opcode = IB_WR_FAST_REG_MR; + f_wr.wr.fast_reg.length = mapping->m_sg.bytes; + f_wr.wr.fast_reg.rkey = mapping->m_rkey; + f_wr.wr.fast_reg.page_list = ibmr->page_list; + f_wr.wr.fast_reg.page_list_len = mapping->m_sg.dma_len; + f_wr.wr.fast_reg.page_shift = ibmr->device->page_shift; + f_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + f_wr.wr.fast_reg.iova_start = 0; + f_wr.send_flags = IB_SEND_SIGNALED; + + failed_wr = &f_wr; + ret = ib_post_send(ibmr->cm_id->qp, &f_wr, &failed_wr); + BUG_ON(failed_wr != &f_wr); + if (ret && printk_ratelimit()) + printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", + __func__, __LINE__, ret); + return ret; +} + +static int rds_iw_rdma_fastreg_inv(struct rds_iw_mr *ibmr) +{ + struct ib_send_wr s_wr, *failed_wr; + int ret = 0; + + if (!ibmr->cm_id->qp || !ibmr->mr) + goto out; + + memset(&s_wr, 0, sizeof(s_wr)); + s_wr.wr_id = RDS_IW_LOCAL_INV_WR_ID; + s_wr.opcode = IB_WR_LOCAL_INV; + s_wr.ex.invalidate_rkey = ibmr->mr->rkey; + s_wr.send_flags = IB_SEND_SIGNALED; + + failed_wr = &s_wr; + ret = ib_post_send(ibmr->cm_id->qp, &s_wr, &failed_wr); + if (ret && printk_ratelimit()) { + printk(KERN_WARNING "RDS/IW: %s:%d ib_post_send returned %d\n", + __func__, __LINE__, ret); + goto out; + } +out: + return ret; +} + +static int rds_iw_map_fastreg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr, + struct scatterlist *sg, + unsigned int sg_len) +{ + struct rds_iw_device *rds_iwdev = pool->device; + struct rds_iw_mapping *mapping = &ibmr->mapping; + u64 *dma_pages; + int i, ret = 0; + + rds_iw_set_scatterlist(&mapping->m_sg, sg, sg_len); + + dma_pages = rds_iw_map_scatterlist(rds_iwdev, + &mapping->m_sg, + rds_iwdev->page_shift); + if (IS_ERR(dma_pages)) { + ret = PTR_ERR(dma_pages); + dma_pages = NULL; + goto out; + } + + if (mapping->m_sg.dma_len > pool->max_message_size) { + ret = -EMSGSIZE; + goto out; + } + + for (i = 0; i < mapping->m_sg.dma_npages; ++i) + ibmr->page_list->page_list[i] = dma_pages[i]; + + ret = rds_iw_rdma_build_fastreg(mapping); + if (ret) + goto out; + + rds_iw_stats_inc(s_iw_rdma_mr_used); + +out: + kfree(dma_pages); + + return ret; +} + +/* + * "Free" a fastreg MR. + */ +static void rds_iw_free_fastreg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr) +{ + unsigned long flags; + int ret; + + if (!ibmr->mapping.m_sg.dma_len) + return; + + ret = rds_iw_rdma_fastreg_inv(ibmr); + if (ret) + return; + + /* Try to post the LOCAL_INV WR to the queue. */ + spin_lock_irqsave(&pool->list_lock, flags); + + list_add_tail(&ibmr->mapping.m_list, &pool->dirty_list); + atomic_add(ibmr->mapping.m_sg.len, &pool->free_pinned); + atomic_inc(&pool->dirty_count); + + spin_unlock_irqrestore(&pool->list_lock, flags); +} + +static unsigned int rds_iw_unmap_fastreg_list(struct rds_iw_mr_pool *pool, + struct list_head *unmap_list, + struct list_head *kill_list) +{ + struct rds_iw_mapping *mapping, *next; + unsigned int ncleaned = 0; + LIST_HEAD(laundered); + + /* Batched invalidation of fastreg MRs. + * Why do we do it this way, even though we could pipeline unmap + * and remap? The reason is the application semantics - when the + * application requests an invalidation of MRs, it expects all + * previously released R_Keys to become invalid. + * + * If we implement MR reuse naively, we risk memory corruption + * (this has actually been observed). So the default behavior + * requires that a MR goes through an explicit unmap operation before + * we can reuse it again. + * + * We could probably improve on this a little, by allowing immediate + * reuse of a MR on the same socket (eg you could add small + * cache of unused MRs to strct rds_socket - GET_MR could grab one + * of these without requiring an explicit invalidate). + */ + while (!list_empty(unmap_list)) { + unsigned long flags; + + spin_lock_irqsave(&pool->list_lock, flags); + list_for_each_entry_safe(mapping, next, unmap_list, m_list) { + list_move(&mapping->m_list, &laundered); + ncleaned++; + } + spin_unlock_irqrestore(&pool->list_lock, flags); + } + + /* Move all laundered mappings back to the unmap list. + * We do not kill any WRs right now - it doesn't seem the + * fastreg API has a max_remap limit. */ + list_splice_init(&laundered, unmap_list); + + return ncleaned; +} + +static void rds_iw_destroy_fastreg(struct rds_iw_mr_pool *pool, + struct rds_iw_mr *ibmr) +{ + if (ibmr->page_list) + ib_free_fast_reg_page_list(ibmr->page_list); + if (ibmr->mr) + ib_dereg_mr(ibmr->mr); +} diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c new file mode 100644 index 000000000000..a1931f0027a2 --- /dev/null +++ b/net/rds/iw_recv.c @@ -0,0 +1,869 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include + +#include "rds.h" +#include "iw.h" + +static struct kmem_cache *rds_iw_incoming_slab; +static struct kmem_cache *rds_iw_frag_slab; +static atomic_t rds_iw_allocation = ATOMIC_INIT(0); + +static void rds_iw_frag_drop_page(struct rds_page_frag *frag) +{ + rdsdebug("frag %p page %p\n", frag, frag->f_page); + __free_page(frag->f_page); + frag->f_page = NULL; +} + +static void rds_iw_frag_free(struct rds_page_frag *frag) +{ + rdsdebug("frag %p page %p\n", frag, frag->f_page); + BUG_ON(frag->f_page != NULL); + kmem_cache_free(rds_iw_frag_slab, frag); +} + +/* + * We map a page at a time. Its fragments are posted in order. This + * is called in fragment order as the fragments get send completion events. + * Only the last frag in the page performs the unmapping. + * + * It's OK for ring cleanup to call this in whatever order it likes because + * DMA is not in flight and so we can unmap while other ring entries still + * hold page references in their frags. + */ +static void rds_iw_recv_unmap_page(struct rds_iw_connection *ic, + struct rds_iw_recv_work *recv) +{ + struct rds_page_frag *frag = recv->r_frag; + + rdsdebug("recv %p frag %p page %p\n", recv, frag, frag->f_page); + if (frag->f_mapped) + ib_dma_unmap_page(ic->i_cm_id->device, + frag->f_mapped, + RDS_FRAG_SIZE, DMA_FROM_DEVICE); + frag->f_mapped = 0; +} + +void rds_iw_recv_init_ring(struct rds_iw_connection *ic) +{ + struct rds_iw_recv_work *recv; + u32 i; + + for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) { + struct ib_sge *sge; + + recv->r_iwinc = NULL; + recv->r_frag = NULL; + + recv->r_wr.next = NULL; + recv->r_wr.wr_id = i; + recv->r_wr.sg_list = recv->r_sge; + recv->r_wr.num_sge = RDS_IW_RECV_SGE; + + sge = rds_iw_data_sge(ic, recv->r_sge); + sge->addr = 0; + sge->length = RDS_FRAG_SIZE; + sge->lkey = 0; + + sge = rds_iw_header_sge(ic, recv->r_sge); + sge->addr = ic->i_recv_hdrs_dma + (i * sizeof(struct rds_header)); + sge->length = sizeof(struct rds_header); + sge->lkey = 0; + } +} + +static void rds_iw_recv_clear_one(struct rds_iw_connection *ic, + struct rds_iw_recv_work *recv) +{ + if (recv->r_iwinc) { + rds_inc_put(&recv->r_iwinc->ii_inc); + recv->r_iwinc = NULL; + } + if (recv->r_frag) { + rds_iw_recv_unmap_page(ic, recv); + if (recv->r_frag->f_page) + rds_iw_frag_drop_page(recv->r_frag); + rds_iw_frag_free(recv->r_frag); + recv->r_frag = NULL; + } +} + +void rds_iw_recv_clear_ring(struct rds_iw_connection *ic) +{ + u32 i; + + for (i = 0; i < ic->i_recv_ring.w_nr; i++) + rds_iw_recv_clear_one(ic, &ic->i_recvs[i]); + + if (ic->i_frag.f_page) + rds_iw_frag_drop_page(&ic->i_frag); +} + +static int rds_iw_recv_refill_one(struct rds_connection *conn, + struct rds_iw_recv_work *recv, + gfp_t kptr_gfp, gfp_t page_gfp) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + dma_addr_t dma_addr; + struct ib_sge *sge; + int ret = -ENOMEM; + + if (recv->r_iwinc == NULL) { + if (atomic_read(&rds_iw_allocation) >= rds_iw_sysctl_max_recv_allocation) { + rds_iw_stats_inc(s_iw_rx_alloc_limit); + goto out; + } + recv->r_iwinc = kmem_cache_alloc(rds_iw_incoming_slab, + kptr_gfp); + if (recv->r_iwinc == NULL) + goto out; + atomic_inc(&rds_iw_allocation); + INIT_LIST_HEAD(&recv->r_iwinc->ii_frags); + rds_inc_init(&recv->r_iwinc->ii_inc, conn, conn->c_faddr); + } + + if (recv->r_frag == NULL) { + recv->r_frag = kmem_cache_alloc(rds_iw_frag_slab, kptr_gfp); + if (recv->r_frag == NULL) + goto out; + INIT_LIST_HEAD(&recv->r_frag->f_item); + recv->r_frag->f_page = NULL; + } + + if (ic->i_frag.f_page == NULL) { + ic->i_frag.f_page = alloc_page(page_gfp); + if (ic->i_frag.f_page == NULL) + goto out; + ic->i_frag.f_offset = 0; + } + + dma_addr = ib_dma_map_page(ic->i_cm_id->device, + ic->i_frag.f_page, + ic->i_frag.f_offset, + RDS_FRAG_SIZE, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(ic->i_cm_id->device, dma_addr)) + goto out; + + /* + * Once we get the RDS_PAGE_LAST_OFF frag then rds_iw_frag_unmap() + * must be called on this recv. This happens as completions hit + * in order or on connection shutdown. + */ + recv->r_frag->f_page = ic->i_frag.f_page; + recv->r_frag->f_offset = ic->i_frag.f_offset; + recv->r_frag->f_mapped = dma_addr; + + sge = rds_iw_data_sge(ic, recv->r_sge); + sge->addr = dma_addr; + sge->length = RDS_FRAG_SIZE; + + sge = rds_iw_header_sge(ic, recv->r_sge); + sge->addr = ic->i_recv_hdrs_dma + (recv - ic->i_recvs) * sizeof(struct rds_header); + sge->length = sizeof(struct rds_header); + + get_page(recv->r_frag->f_page); + + if (ic->i_frag.f_offset < RDS_PAGE_LAST_OFF) { + ic->i_frag.f_offset += RDS_FRAG_SIZE; + } else { + put_page(ic->i_frag.f_page); + ic->i_frag.f_page = NULL; + ic->i_frag.f_offset = 0; + } + + ret = 0; +out: + return ret; +} + +/* + * This tries to allocate and post unused work requests after making sure that + * they have all the allocations they need to queue received fragments into + * sockets. The i_recv_mutex is held here so that ring_alloc and _unalloc + * pairs don't go unmatched. + * + * -1 is returned if posting fails due to temporary resource exhaustion. + */ +int rds_iw_recv_refill(struct rds_connection *conn, gfp_t kptr_gfp, + gfp_t page_gfp, int prefill) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + struct rds_iw_recv_work *recv; + struct ib_recv_wr *failed_wr; + unsigned int posted = 0; + int ret = 0; + u32 pos; + + while ((prefill || rds_conn_up(conn)) + && rds_iw_ring_alloc(&ic->i_recv_ring, 1, &pos)) { + if (pos >= ic->i_recv_ring.w_nr) { + printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", + pos); + ret = -EINVAL; + break; + } + + recv = &ic->i_recvs[pos]; + ret = rds_iw_recv_refill_one(conn, recv, kptr_gfp, page_gfp); + if (ret) { + ret = -1; + break; + } + + /* XXX when can this fail? */ + ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); + rdsdebug("recv %p iwinc %p page %p addr %lu ret %d\n", recv, + recv->r_iwinc, recv->r_frag->f_page, + (long) recv->r_frag->f_mapped, ret); + if (ret) { + rds_iw_conn_error(conn, "recv post on " + "%pI4 returned %d, disconnecting and " + "reconnecting\n", &conn->c_faddr, + ret); + ret = -1; + break; + } + + posted++; + } + + /* We're doing flow control - update the window. */ + if (ic->i_flowctl && posted) + rds_iw_advertise_credits(conn, posted); + + if (ret) + rds_iw_ring_unalloc(&ic->i_recv_ring, 1); + return ret; +} + +void rds_iw_inc_purge(struct rds_incoming *inc) +{ + struct rds_iw_incoming *iwinc; + struct rds_page_frag *frag; + struct rds_page_frag *pos; + + iwinc = container_of(inc, struct rds_iw_incoming, ii_inc); + rdsdebug("purging iwinc %p inc %p\n", iwinc, inc); + + list_for_each_entry_safe(frag, pos, &iwinc->ii_frags, f_item) { + list_del_init(&frag->f_item); + rds_iw_frag_drop_page(frag); + rds_iw_frag_free(frag); + } +} + +void rds_iw_inc_free(struct rds_incoming *inc) +{ + struct rds_iw_incoming *iwinc; + + iwinc = container_of(inc, struct rds_iw_incoming, ii_inc); + + rds_iw_inc_purge(inc); + rdsdebug("freeing iwinc %p inc %p\n", iwinc, inc); + BUG_ON(!list_empty(&iwinc->ii_frags)); + kmem_cache_free(rds_iw_incoming_slab, iwinc); + atomic_dec(&rds_iw_allocation); + BUG_ON(atomic_read(&rds_iw_allocation) < 0); +} + +int rds_iw_inc_copy_to_user(struct rds_incoming *inc, struct iovec *first_iov, + size_t size) +{ + struct rds_iw_incoming *iwinc; + struct rds_page_frag *frag; + struct iovec *iov = first_iov; + unsigned long to_copy; + unsigned long frag_off = 0; + unsigned long iov_off = 0; + int copied = 0; + int ret; + u32 len; + + iwinc = container_of(inc, struct rds_iw_incoming, ii_inc); + frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item); + len = be32_to_cpu(inc->i_hdr.h_len); + + while (copied < size && copied < len) { + if (frag_off == RDS_FRAG_SIZE) { + frag = list_entry(frag->f_item.next, + struct rds_page_frag, f_item); + frag_off = 0; + } + while (iov_off == iov->iov_len) { + iov_off = 0; + iov++; + } + + to_copy = min(iov->iov_len - iov_off, RDS_FRAG_SIZE - frag_off); + to_copy = min_t(size_t, to_copy, size - copied); + to_copy = min_t(unsigned long, to_copy, len - copied); + + rdsdebug("%lu bytes to user [%p, %zu] + %lu from frag " + "[%p, %lu] + %lu\n", + to_copy, iov->iov_base, iov->iov_len, iov_off, + frag->f_page, frag->f_offset, frag_off); + + /* XXX needs + offset for multiple recvs per page */ + ret = rds_page_copy_to_user(frag->f_page, + frag->f_offset + frag_off, + iov->iov_base + iov_off, + to_copy); + if (ret) { + copied = ret; + break; + } + + iov_off += to_copy; + frag_off += to_copy; + copied += to_copy; + } + + return copied; +} + +/* ic starts out kzalloc()ed */ +void rds_iw_recv_init_ack(struct rds_iw_connection *ic) +{ + struct ib_send_wr *wr = &ic->i_ack_wr; + struct ib_sge *sge = &ic->i_ack_sge; + + sge->addr = ic->i_ack_dma; + sge->length = sizeof(struct rds_header); + sge->lkey = rds_iw_local_dma_lkey(ic); + + wr->sg_list = sge; + wr->num_sge = 1; + wr->opcode = IB_WR_SEND; + wr->wr_id = RDS_IW_ACK_WR_ID; + wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; +} + +/* + * You'd think that with reliable IB connections you wouldn't need to ack + * messages that have been received. The problem is that IB hardware generates + * an ack message before it has DMAed the message into memory. This creates a + * potential message loss if the HCA is disabled for any reason between when it + * sends the ack and before the message is DMAed and processed. This is only a + * potential issue if another HCA is available for fail-over. + * + * When the remote host receives our ack they'll free the sent message from + * their send queue. To decrease the latency of this we always send an ack + * immediately after we've received messages. + * + * For simplicity, we only have one ack in flight at a time. This puts + * pressure on senders to have deep enough send queues to absorb the latency of + * a single ack frame being in flight. This might not be good enough. + * + * This is implemented by have a long-lived send_wr and sge which point to a + * statically allocated ack frame. This ack wr does not fall under the ring + * accounting that the tx and rx wrs do. The QP attribute specifically makes + * room for it beyond the ring size. Send completion notices its special + * wr_id and avoids working with the ring in that case. + */ +static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, + int ack_required) +{ + rds_iw_set_64bit(&ic->i_ack_next, seq); + if (ack_required) { + smp_mb__before_clear_bit(); + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + } +} + +static u64 rds_iw_get_ack(struct rds_iw_connection *ic) +{ + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + smp_mb__after_clear_bit(); + + return ic->i_ack_next; +} + +static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) +{ + struct rds_header *hdr = ic->i_ack; + struct ib_send_wr *failed_wr; + u64 seq; + int ret; + + seq = rds_iw_get_ack(ic); + + rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); + rds_message_populate_header(hdr, 0, 0, 0); + hdr->h_ack = cpu_to_be64(seq); + hdr->h_credit = adv_credits; + rds_message_make_checksum(hdr); + ic->i_ack_queued = jiffies; + + ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, &failed_wr); + if (unlikely(ret)) { + /* Failed to send. Release the WR, and + * force another ACK. + */ + clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + + rds_iw_stats_inc(s_iw_ack_send_failure); + /* Need to finesse this later. */ + BUG(); + } else + rds_iw_stats_inc(s_iw_ack_sent); +} + +/* + * There are 3 ways of getting acknowledgements to the peer: + * 1. We call rds_iw_attempt_ack from the recv completion handler + * to send an ACK-only frame. + * However, there can be only one such frame in the send queue + * at any time, so we may have to postpone it. + * 2. When another (data) packet is transmitted while there's + * an ACK in the queue, we piggyback the ACK sequence number + * on the data packet. + * 3. If the ACK WR is done sending, we get called from the + * send queue completion handler, and check whether there's + * another ACK pending (postponed because the WR was on the + * queue). If so, we transmit it. + * + * We maintain 2 variables: + * - i_ack_flags, which keeps track of whether the ACK WR + * is currently in the send queue or not (IB_ACK_IN_FLIGHT) + * - i_ack_next, which is the last sequence number we received + * + * Potentially, send queue and receive queue handlers can run concurrently. + * + * Reconnecting complicates this picture just slightly. When we + * reconnect, we may be seeing duplicate packets. The peer + * is retransmitting them, because it hasn't seen an ACK for + * them. It is important that we ACK these. + * + * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with + * this flag set *MUST* be acknowledged immediately. + */ + +/* + * When we get here, we're called from the recv queue handler. + * Check whether we ought to transmit an ACK. + */ +void rds_iw_attempt_ack(struct rds_iw_connection *ic) +{ + unsigned int adv_credits; + + if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) + return; + + if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) { + rds_iw_stats_inc(s_iw_ack_send_delayed); + return; + } + + /* Can we get a send credit? */ + if (!rds_iw_send_grab_credits(ic, 1, &adv_credits, 0)) { + rds_iw_stats_inc(s_iw_tx_throttle); + clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); + return; + } + + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + rds_iw_send_ack(ic, adv_credits); +} + +/* + * We get here from the send completion handler, when the + * adapter tells us the ACK frame was sent. + */ +void rds_iw_ack_send_complete(struct rds_iw_connection *ic) +{ + clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); + rds_iw_attempt_ack(ic); +} + +/* + * This is called by the regular xmit code when it wants to piggyback + * an ACK on an outgoing frame. + */ +u64 rds_iw_piggyb_ack(struct rds_iw_connection *ic) +{ + if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) + rds_iw_stats_inc(s_iw_ack_send_piggybacked); + return rds_iw_get_ack(ic); +} + +/* + * It's kind of lame that we're copying from the posted receive pages into + * long-lived bitmaps. We could have posted the bitmaps and rdma written into + * them. But receiving new congestion bitmaps should be a *rare* event, so + * hopefully we won't need to invest that complexity in making it more + * efficient. By copying we can share a simpler core with TCP which has to + * copy. + */ +static void rds_iw_cong_recv(struct rds_connection *conn, + struct rds_iw_incoming *iwinc) +{ + struct rds_cong_map *map; + unsigned int map_off; + unsigned int map_page; + struct rds_page_frag *frag; + unsigned long frag_off; + unsigned long to_copy; + unsigned long copied; + uint64_t uncongested = 0; + void *addr; + + /* catch completely corrupt packets */ + if (be32_to_cpu(iwinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES) + return; + + map = conn->c_fcong; + map_page = 0; + map_off = 0; + + frag = list_entry(iwinc->ii_frags.next, struct rds_page_frag, f_item); + frag_off = 0; + + copied = 0; + + while (copied < RDS_CONG_MAP_BYTES) { + uint64_t *src, *dst; + unsigned int k; + + to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); + BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ + + addr = kmap_atomic(frag->f_page, KM_SOFTIRQ0); + + src = addr + frag_off; + dst = (void *)map->m_page_addrs[map_page] + map_off; + for (k = 0; k < to_copy; k += 8) { + /* Record ports that became uncongested, ie + * bits that changed from 0 to 1. */ + uncongested |= ~(*src) & *dst; + *dst++ = *src++; + } + kunmap_atomic(addr, KM_SOFTIRQ0); + + copied += to_copy; + + map_off += to_copy; + if (map_off == PAGE_SIZE) { + map_off = 0; + map_page++; + } + + frag_off += to_copy; + if (frag_off == RDS_FRAG_SIZE) { + frag = list_entry(frag->f_item.next, + struct rds_page_frag, f_item); + frag_off = 0; + } + } + + /* the congestion map is in little endian order */ + uncongested = le64_to_cpu(uncongested); + + rds_cong_map_updated(map, uncongested); +} + +/* + * Rings are posted with all the allocations they'll need to queue the + * incoming message to the receiving socket so this can't fail. + * All fragments start with a header, so we can make sure we're not receiving + * garbage, and we can tell a small 8 byte fragment from an ACK frame. + */ +struct rds_iw_ack_state { + u64 ack_next; + u64 ack_recv; + unsigned int ack_required:1; + unsigned int ack_next_valid:1; + unsigned int ack_recv_valid:1; +}; + +static void rds_iw_process_recv(struct rds_connection *conn, + struct rds_iw_recv_work *recv, u32 byte_len, + struct rds_iw_ack_state *state) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + struct rds_iw_incoming *iwinc = ic->i_iwinc; + struct rds_header *ihdr, *hdr; + + /* XXX shut down the connection if port 0,0 are seen? */ + + rdsdebug("ic %p iwinc %p recv %p byte len %u\n", ic, iwinc, recv, + byte_len); + + if (byte_len < sizeof(struct rds_header)) { + rds_iw_conn_error(conn, "incoming message " + "from %pI4 didn't inclue a " + "header, disconnecting and " + "reconnecting\n", + &conn->c_faddr); + return; + } + byte_len -= sizeof(struct rds_header); + + ihdr = &ic->i_recv_hdrs[recv - ic->i_recvs]; + + /* Validate the checksum. */ + if (!rds_message_verify_checksum(ihdr)) { + rds_iw_conn_error(conn, "incoming message " + "from %pI4 has corrupted header - " + "forcing a reconnect\n", + &conn->c_faddr); + rds_stats_inc(s_recv_drop_bad_checksum); + return; + } + + /* Process the ACK sequence which comes with every packet */ + state->ack_recv = be64_to_cpu(ihdr->h_ack); + state->ack_recv_valid = 1; + + /* Process the credits update if there was one */ + if (ihdr->h_credit) + rds_iw_send_add_credits(conn, ihdr->h_credit); + + if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && byte_len == 0) { + /* This is an ACK-only packet. The fact that it gets + * special treatment here is that historically, ACKs + * were rather special beasts. + */ + rds_iw_stats_inc(s_iw_ack_received); + + /* + * Usually the frags make their way on to incs and are then freed as + * the inc is freed. We don't go that route, so we have to drop the + * page ref ourselves. We can't just leave the page on the recv + * because that confuses the dma mapping of pages and each recv's use + * of a partial page. We can leave the frag, though, it will be + * reused. + * + * FIXME: Fold this into the code path below. + */ + rds_iw_frag_drop_page(recv->r_frag); + return; + } + + /* + * If we don't already have an inc on the connection then this + * fragment has a header and starts a message.. copy its header + * into the inc and save the inc so we can hang upcoming fragments + * off its list. + */ + if (iwinc == NULL) { + iwinc = recv->r_iwinc; + recv->r_iwinc = NULL; + ic->i_iwinc = iwinc; + + hdr = &iwinc->ii_inc.i_hdr; + memcpy(hdr, ihdr, sizeof(*hdr)); + ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); + + rdsdebug("ic %p iwinc %p rem %u flag 0x%x\n", ic, iwinc, + ic->i_recv_data_rem, hdr->h_flags); + } else { + hdr = &iwinc->ii_inc.i_hdr; + /* We can't just use memcmp here; fragments of a + * single message may carry different ACKs */ + if (hdr->h_sequence != ihdr->h_sequence + || hdr->h_len != ihdr->h_len + || hdr->h_sport != ihdr->h_sport + || hdr->h_dport != ihdr->h_dport) { + rds_iw_conn_error(conn, + "fragment header mismatch; forcing reconnect\n"); + return; + } + } + + list_add_tail(&recv->r_frag->f_item, &iwinc->ii_frags); + recv->r_frag = NULL; + + if (ic->i_recv_data_rem > RDS_FRAG_SIZE) + ic->i_recv_data_rem -= RDS_FRAG_SIZE; + else { + ic->i_recv_data_rem = 0; + ic->i_iwinc = NULL; + + if (iwinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) + rds_iw_cong_recv(conn, iwinc); + else { + rds_recv_incoming(conn, conn->c_faddr, conn->c_laddr, + &iwinc->ii_inc, GFP_ATOMIC, + KM_SOFTIRQ0); + state->ack_next = be64_to_cpu(hdr->h_sequence); + state->ack_next_valid = 1; + } + + /* Evaluate the ACK_REQUIRED flag *after* we received + * the complete frame, and after bumping the next_rx + * sequence. */ + if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) { + rds_stats_inc(s_recv_ack_required); + state->ack_required = 1; + } + + rds_inc_put(&iwinc->ii_inc); + } +} + +/* + * Plucking the oldest entry from the ring can be done concurrently with + * the thread refilling the ring. Each ring operation is protected by + * spinlocks and the transient state of refilling doesn't change the + * recording of which entry is oldest. + * + * This relies on IB only calling one cq comp_handler for each cq so that + * there will only be one caller of rds_recv_incoming() per RDS connection. + */ +void rds_iw_recv_cq_comp_handler(struct ib_cq *cq, void *context) +{ + struct rds_connection *conn = context; + struct rds_iw_connection *ic = conn->c_transport_data; + struct ib_wc wc; + struct rds_iw_ack_state state = { 0, }; + struct rds_iw_recv_work *recv; + + rdsdebug("conn %p cq %p\n", conn, cq); + + rds_iw_stats_inc(s_iw_rx_cq_call); + + ib_req_notify_cq(cq, IB_CQ_SOLICITED); + + while (ib_poll_cq(cq, 1, &wc) > 0) { + rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", + (unsigned long long)wc.wr_id, wc.status, wc.byte_len, + be32_to_cpu(wc.ex.imm_data)); + rds_iw_stats_inc(s_iw_rx_cq_event); + + recv = &ic->i_recvs[rds_iw_ring_oldest(&ic->i_recv_ring)]; + + rds_iw_recv_unmap_page(ic, recv); + + /* + * Also process recvs in connecting state because it is possible + * to get a recv completion _before_ the rdmacm ESTABLISHED + * event is processed. + */ + if (rds_conn_up(conn) || rds_conn_connecting(conn)) { + /* We expect errors as the qp is drained during shutdown */ + if (wc.status == IB_WC_SUCCESS) { + rds_iw_process_recv(conn, recv, wc.byte_len, &state); + } else { + rds_iw_conn_error(conn, "recv completion on " + "%pI4 had status %u, disconnecting and " + "reconnecting\n", &conn->c_faddr, + wc.status); + } + } + + rds_iw_ring_free(&ic->i_recv_ring, 1); + } + + if (state.ack_next_valid) + rds_iw_set_ack(ic, state.ack_next, state.ack_required); + if (state.ack_recv_valid && state.ack_recv > ic->i_ack_recv) { + rds_send_drop_acked(conn, state.ack_recv, NULL); + ic->i_ack_recv = state.ack_recv; + } + if (rds_conn_up(conn)) + rds_iw_attempt_ack(ic); + + /* If we ever end up with a really empty receive ring, we're + * in deep trouble, as the sender will definitely see RNR + * timeouts. */ + if (rds_iw_ring_empty(&ic->i_recv_ring)) + rds_iw_stats_inc(s_iw_rx_ring_empty); + + /* + * If the ring is running low, then schedule the thread to refill. + */ + if (rds_iw_ring_low(&ic->i_recv_ring)) + queue_delayed_work(rds_wq, &conn->c_recv_w, 0); +} + +int rds_iw_recv(struct rds_connection *conn) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + int ret = 0; + + rdsdebug("conn %p\n", conn); + + /* + * If we get a temporary posting failure in this context then + * we're really low and we want the caller to back off for a bit. + */ + mutex_lock(&ic->i_recv_mutex); + if (rds_iw_recv_refill(conn, GFP_KERNEL, GFP_HIGHUSER, 0)) + ret = -ENOMEM; + else + rds_iw_stats_inc(s_iw_rx_refill_from_thread); + mutex_unlock(&ic->i_recv_mutex); + + if (rds_conn_up(conn)) + rds_iw_attempt_ack(ic); + + return ret; +} + +int __init rds_iw_recv_init(void) +{ + struct sysinfo si; + int ret = -ENOMEM; + + /* Default to 30% of all available RAM for recv memory */ + si_meminfo(&si); + rds_iw_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; + + rds_iw_incoming_slab = kmem_cache_create("rds_iw_incoming", + sizeof(struct rds_iw_incoming), + 0, 0, NULL); + if (rds_iw_incoming_slab == NULL) + goto out; + + rds_iw_frag_slab = kmem_cache_create("rds_iw_frag", + sizeof(struct rds_page_frag), + 0, 0, NULL); + if (rds_iw_frag_slab == NULL) + kmem_cache_destroy(rds_iw_incoming_slab); + else + ret = 0; +out: + return ret; +} + +void rds_iw_recv_exit(void) +{ + kmem_cache_destroy(rds_iw_incoming_slab); + kmem_cache_destroy(rds_iw_frag_slab); +} diff --git a/net/rds/iw_ring.c b/net/rds/iw_ring.c new file mode 100644 index 000000000000..d422d4b5deef --- /dev/null +++ b/net/rds/iw_ring.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "rds.h" +#include "iw.h" + +/* + * Locking for IB rings. + * We assume that allocation is always protected by a mutex + * in the caller (this is a valid assumption for the current + * implementation). + * + * Freeing always happens in an interrupt, and hence only + * races with allocations, but not with other free()s. + * + * The interaction between allocation and freeing is that + * the alloc code has to determine the number of free entries. + * To this end, we maintain two counters; an allocation counter + * and a free counter. Both are allowed to run freely, and wrap + * around. + * The number of used entries is always (alloc_ctr - free_ctr) % NR. + * + * The current implementation makes free_ctr atomic. When the + * caller finds an allocation fails, it should set an "alloc fail" + * bit and retry the allocation. The "alloc fail" bit essentially tells + * the CQ completion handlers to wake it up after freeing some + * more entries. + */ + +/* + * This only happens on shutdown. + */ +DECLARE_WAIT_QUEUE_HEAD(rds_iw_ring_empty_wait); + +void rds_iw_ring_init(struct rds_iw_work_ring *ring, u32 nr) +{ + memset(ring, 0, sizeof(*ring)); + ring->w_nr = nr; + rdsdebug("ring %p nr %u\n", ring, ring->w_nr); +} + +static inline u32 __rds_iw_ring_used(struct rds_iw_work_ring *ring) +{ + u32 diff; + + /* This assumes that atomic_t has at least as many bits as u32 */ + diff = ring->w_alloc_ctr - (u32) atomic_read(&ring->w_free_ctr); + BUG_ON(diff > ring->w_nr); + + return diff; +} + +void rds_iw_ring_resize(struct rds_iw_work_ring *ring, u32 nr) +{ + /* We only ever get called from the connection setup code, + * prior to creating the QP. */ + BUG_ON(__rds_iw_ring_used(ring)); + ring->w_nr = nr; +} + +static int __rds_iw_ring_empty(struct rds_iw_work_ring *ring) +{ + return __rds_iw_ring_used(ring) == 0; +} + +u32 rds_iw_ring_alloc(struct rds_iw_work_ring *ring, u32 val, u32 *pos) +{ + u32 ret = 0, avail; + + avail = ring->w_nr - __rds_iw_ring_used(ring); + + rdsdebug("ring %p val %u next %u free %u\n", ring, val, + ring->w_alloc_ptr, avail); + + if (val && avail) { + ret = min(val, avail); + *pos = ring->w_alloc_ptr; + + ring->w_alloc_ptr = (ring->w_alloc_ptr + ret) % ring->w_nr; + ring->w_alloc_ctr += ret; + } + + return ret; +} + +void rds_iw_ring_free(struct rds_iw_work_ring *ring, u32 val) +{ + ring->w_free_ptr = (ring->w_free_ptr + val) % ring->w_nr; + atomic_add(val, &ring->w_free_ctr); + + if (__rds_iw_ring_empty(ring) && + waitqueue_active(&rds_iw_ring_empty_wait)) + wake_up(&rds_iw_ring_empty_wait); +} + +void rds_iw_ring_unalloc(struct rds_iw_work_ring *ring, u32 val) +{ + ring->w_alloc_ptr = (ring->w_alloc_ptr - val) % ring->w_nr; + ring->w_alloc_ctr -= val; +} + +int rds_iw_ring_empty(struct rds_iw_work_ring *ring) +{ + return __rds_iw_ring_empty(ring); +} + +int rds_iw_ring_low(struct rds_iw_work_ring *ring) +{ + return __rds_iw_ring_used(ring) <= (ring->w_nr >> 2); +} + + +/* + * returns the oldest alloced ring entry. This will be the next one + * freed. This can't be called if there are none allocated. + */ +u32 rds_iw_ring_oldest(struct rds_iw_work_ring *ring) +{ + return ring->w_free_ptr; +} + +/* + * returns the number of completed work requests. + */ + +u32 rds_iw_ring_completed(struct rds_iw_work_ring *ring, u32 wr_id, u32 oldest) +{ + u32 ret; + + if (oldest <= (unsigned long long)wr_id) + ret = (unsigned long long)wr_id - oldest + 1; + else + ret = ring->w_nr - oldest + (unsigned long long)wr_id + 1; + + rdsdebug("ring %p ret %u wr_id %u oldest %u\n", ring, ret, + wr_id, oldest); + return ret; +} diff --git a/net/rds/iw_send.c b/net/rds/iw_send.c new file mode 100644 index 000000000000..22dd38ffd608 --- /dev/null +++ b/net/rds/iw_send.c @@ -0,0 +1,975 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include +#include + +#include "rds.h" +#include "rdma.h" +#include "iw.h" + +static void rds_iw_send_rdma_complete(struct rds_message *rm, + int wc_status) +{ + int notify_status; + + switch (wc_status) { + case IB_WC_WR_FLUSH_ERR: + return; + + case IB_WC_SUCCESS: + notify_status = RDS_RDMA_SUCCESS; + break; + + case IB_WC_REM_ACCESS_ERR: + notify_status = RDS_RDMA_REMOTE_ERROR; + break; + + default: + notify_status = RDS_RDMA_OTHER_ERROR; + break; + } + rds_rdma_send_complete(rm, notify_status); +} + +static void rds_iw_send_unmap_rdma(struct rds_iw_connection *ic, + struct rds_rdma_op *op) +{ + if (op->r_mapped) { + ib_dma_unmap_sg(ic->i_cm_id->device, + op->r_sg, op->r_nents, + op->r_write ? DMA_TO_DEVICE : DMA_FROM_DEVICE); + op->r_mapped = 0; + } +} + +static void rds_iw_send_unmap_rm(struct rds_iw_connection *ic, + struct rds_iw_send_work *send, + int wc_status) +{ + struct rds_message *rm = send->s_rm; + + rdsdebug("ic %p send %p rm %p\n", ic, send, rm); + + ib_dma_unmap_sg(ic->i_cm_id->device, + rm->m_sg, rm->m_nents, + DMA_TO_DEVICE); + + if (rm->m_rdma_op != NULL) { + rds_iw_send_unmap_rdma(ic, rm->m_rdma_op); + + /* If the user asked for a completion notification on this + * message, we can implement three different semantics: + * 1. Notify when we received the ACK on the RDS message + * that was queued with the RDMA. This provides reliable + * notification of RDMA status at the expense of a one-way + * packet delay. + * 2. Notify when the IB stack gives us the completion event for + * the RDMA operation. + * 3. Notify when the IB stack gives us the completion event for + * the accompanying RDS messages. + * Here, we implement approach #3. To implement approach #2, + * call rds_rdma_send_complete from the cq_handler. To implement #1, + * don't call rds_rdma_send_complete at all, and fall back to the notify + * handling in the ACK processing code. + * + * Note: There's no need to explicitly sync any RDMA buffers using + * ib_dma_sync_sg_for_cpu - the completion for the RDMA + * operation itself unmapped the RDMA buffers, which takes care + * of synching. + */ + rds_iw_send_rdma_complete(rm, wc_status); + + if (rm->m_rdma_op->r_write) + rds_stats_add(s_send_rdma_bytes, rm->m_rdma_op->r_bytes); + else + rds_stats_add(s_recv_rdma_bytes, rm->m_rdma_op->r_bytes); + } + + /* If anyone waited for this message to get flushed out, wake + * them up now */ + rds_message_unmapped(rm); + + rds_message_put(rm); + send->s_rm = NULL; +} + +void rds_iw_send_init_ring(struct rds_iw_connection *ic) +{ + struct rds_iw_send_work *send; + u32 i; + + for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { + struct ib_sge *sge; + + send->s_rm = NULL; + send->s_op = NULL; + send->s_mapping = NULL; + + send->s_wr.next = NULL; + send->s_wr.wr_id = i; + send->s_wr.sg_list = send->s_sge; + send->s_wr.num_sge = 1; + send->s_wr.opcode = IB_WR_SEND; + send->s_wr.send_flags = 0; + send->s_wr.ex.imm_data = 0; + + sge = rds_iw_data_sge(ic, send->s_sge); + sge->lkey = 0; + + sge = rds_iw_header_sge(ic, send->s_sge); + sge->addr = ic->i_send_hdrs_dma + (i * sizeof(struct rds_header)); + sge->length = sizeof(struct rds_header); + sge->lkey = 0; + + send->s_mr = ib_alloc_fast_reg_mr(ic->i_pd, fastreg_message_size); + if (IS_ERR(send->s_mr)) { + printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_mr failed\n"); + break; + } + + send->s_page_list = ib_alloc_fast_reg_page_list( + ic->i_cm_id->device, fastreg_message_size); + if (IS_ERR(send->s_page_list)) { + printk(KERN_WARNING "RDS/IW: ib_alloc_fast_reg_page_list failed\n"); + break; + } + } +} + +void rds_iw_send_clear_ring(struct rds_iw_connection *ic) +{ + struct rds_iw_send_work *send; + u32 i; + + for (i = 0, send = ic->i_sends; i < ic->i_send_ring.w_nr; i++, send++) { + BUG_ON(!send->s_mr); + ib_dereg_mr(send->s_mr); + BUG_ON(!send->s_page_list); + ib_free_fast_reg_page_list(send->s_page_list); + if (send->s_wr.opcode == 0xdead) + continue; + if (send->s_rm) + rds_iw_send_unmap_rm(ic, send, IB_WC_WR_FLUSH_ERR); + if (send->s_op) + rds_iw_send_unmap_rdma(ic, send->s_op); + } +} + +/* + * The _oldest/_free ring operations here race cleanly with the alloc/unalloc + * operations performed in the send path. As the sender allocs and potentially + * unallocs the next free entry in the ring it doesn't alter which is + * the next to be freed, which is what this is concerned with. + */ +void rds_iw_send_cq_comp_handler(struct ib_cq *cq, void *context) +{ + struct rds_connection *conn = context; + struct rds_iw_connection *ic = conn->c_transport_data; + struct ib_wc wc; + struct rds_iw_send_work *send; + u32 completed; + u32 oldest; + u32 i; + int ret; + + rdsdebug("cq %p conn %p\n", cq, conn); + rds_iw_stats_inc(s_iw_tx_cq_call); + ret = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); + if (ret) + rdsdebug("ib_req_notify_cq send failed: %d\n", ret); + + while (ib_poll_cq(cq, 1, &wc) > 0) { + rdsdebug("wc wr_id 0x%llx status %u byte_len %u imm_data %u\n", + (unsigned long long)wc.wr_id, wc.status, wc.byte_len, + be32_to_cpu(wc.ex.imm_data)); + rds_iw_stats_inc(s_iw_tx_cq_event); + + if (wc.status != IB_WC_SUCCESS) { + printk(KERN_ERR "WC Error: status = %d opcode = %d\n", wc.status, wc.opcode); + break; + } + + if (wc.opcode == IB_WC_LOCAL_INV && wc.wr_id == RDS_IW_LOCAL_INV_WR_ID) { + ic->i_fastreg_posted = 0; + continue; + } + + if (wc.opcode == IB_WC_FAST_REG_MR && wc.wr_id == RDS_IW_FAST_REG_WR_ID) { + ic->i_fastreg_posted = 1; + continue; + } + + if (wc.wr_id == RDS_IW_ACK_WR_ID) { + if (ic->i_ack_queued + HZ/2 < jiffies) + rds_iw_stats_inc(s_iw_tx_stalled); + rds_iw_ack_send_complete(ic); + continue; + } + + oldest = rds_iw_ring_oldest(&ic->i_send_ring); + + completed = rds_iw_ring_completed(&ic->i_send_ring, wc.wr_id, oldest); + + for (i = 0; i < completed; i++) { + send = &ic->i_sends[oldest]; + + /* In the error case, wc.opcode sometimes contains garbage */ + switch (send->s_wr.opcode) { + case IB_WR_SEND: + if (send->s_rm) + rds_iw_send_unmap_rm(ic, send, wc.status); + break; + case IB_WR_FAST_REG_MR: + case IB_WR_RDMA_WRITE: + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + /* Nothing to be done - the SG list will be unmapped + * when the SEND completes. */ + break; + default: + if (printk_ratelimit()) + printk(KERN_NOTICE + "RDS/IW: %s: unexpected opcode 0x%x in WR!\n", + __func__, send->s_wr.opcode); + break; + } + + send->s_wr.opcode = 0xdead; + send->s_wr.num_sge = 1; + if (send->s_queued + HZ/2 < jiffies) + rds_iw_stats_inc(s_iw_tx_stalled); + + /* If a RDMA operation produced an error, signal this right + * away. If we don't, the subsequent SEND that goes with this + * RDMA will be canceled with ERR_WFLUSH, and the application + * never learn that the RDMA failed. */ + if (unlikely(wc.status == IB_WC_REM_ACCESS_ERR && send->s_op)) { + struct rds_message *rm; + + rm = rds_send_get_message(conn, send->s_op); + if (rm) + rds_iw_send_rdma_complete(rm, wc.status); + } + + oldest = (oldest + 1) % ic->i_send_ring.w_nr; + } + + rds_iw_ring_free(&ic->i_send_ring, completed); + + if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags) + || test_bit(0, &conn->c_map_queued)) + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + + /* We expect errors as the qp is drained during shutdown */ + if (wc.status != IB_WC_SUCCESS && rds_conn_up(conn)) { + rds_iw_conn_error(conn, + "send completion on %pI4 " + "had status %u, disconnecting and reconnecting\n", + &conn->c_faddr, wc.status); + } + } +} + +/* + * This is the main function for allocating credits when sending + * messages. + * + * Conceptually, we have two counters: + * - send credits: this tells us how many WRs we're allowed + * to submit without overruning the reciever's queue. For + * each SEND WR we post, we decrement this by one. + * + * - posted credits: this tells us how many WRs we recently + * posted to the receive queue. This value is transferred + * to the peer as a "credit update" in a RDS header field. + * Every time we transmit credits to the peer, we subtract + * the amount of transferred credits from this counter. + * + * It is essential that we avoid situations where both sides have + * exhausted their send credits, and are unable to send new credits + * to the peer. We achieve this by requiring that we send at least + * one credit update to the peer before exhausting our credits. + * When new credits arrive, we subtract one credit that is withheld + * until we've posted new buffers and are ready to transmit these + * credits (see rds_iw_send_add_credits below). + * + * The RDS send code is essentially single-threaded; rds_send_xmit + * grabs c_send_lock to ensure exclusive access to the send ring. + * However, the ACK sending code is independent and can race with + * message SENDs. + * + * In the send path, we need to update the counters for send credits + * and the counter of posted buffers atomically - when we use the + * last available credit, we cannot allow another thread to race us + * and grab the posted credits counter. Hence, we have to use a + * spinlock to protect the credit counter, or use atomics. + * + * Spinlocks shared between the send and the receive path are bad, + * because they create unnecessary delays. An early implementation + * using a spinlock showed a 5% degradation in throughput at some + * loads. + * + * This implementation avoids spinlocks completely, putting both + * counters into a single atomic, and updating that atomic using + * atomic_add (in the receive path, when receiving fresh credits), + * and using atomic_cmpxchg when updating the two counters. + */ +int rds_iw_send_grab_credits(struct rds_iw_connection *ic, + u32 wanted, u32 *adv_credits, int need_posted) +{ + unsigned int avail, posted, got = 0, advertise; + long oldval, newval; + + *adv_credits = 0; + if (!ic->i_flowctl) + return wanted; + +try_again: + advertise = 0; + oldval = newval = atomic_read(&ic->i_credits); + posted = IB_GET_POST_CREDITS(oldval); + avail = IB_GET_SEND_CREDITS(oldval); + + rdsdebug("rds_iw_send_grab_credits(%u): credits=%u posted=%u\n", + wanted, avail, posted); + + /* The last credit must be used to send a credit update. */ + if (avail && !posted) + avail--; + + if (avail < wanted) { + struct rds_connection *conn = ic->i_cm_id->context; + + /* Oops, there aren't that many credits left! */ + set_bit(RDS_LL_SEND_FULL, &conn->c_flags); + got = avail; + } else { + /* Sometimes you get what you want, lalala. */ + got = wanted; + } + newval -= IB_SET_SEND_CREDITS(got); + + /* + * If need_posted is non-zero, then the caller wants + * the posted regardless of whether any send credits are + * available. + */ + if (posted && (got || need_posted)) { + advertise = min_t(unsigned int, posted, RDS_MAX_ADV_CREDIT); + newval -= IB_SET_POST_CREDITS(advertise); + } + + /* Finally bill everything */ + if (atomic_cmpxchg(&ic->i_credits, oldval, newval) != oldval) + goto try_again; + + *adv_credits = advertise; + return got; +} + +void rds_iw_send_add_credits(struct rds_connection *conn, unsigned int credits) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + + if (credits == 0) + return; + + rdsdebug("rds_iw_send_add_credits(%u): current=%u%s\n", + credits, + IB_GET_SEND_CREDITS(atomic_read(&ic->i_credits)), + test_bit(RDS_LL_SEND_FULL, &conn->c_flags) ? ", ll_send_full" : ""); + + atomic_add(IB_SET_SEND_CREDITS(credits), &ic->i_credits); + if (test_and_clear_bit(RDS_LL_SEND_FULL, &conn->c_flags)) + queue_delayed_work(rds_wq, &conn->c_send_w, 0); + + WARN_ON(IB_GET_SEND_CREDITS(credits) >= 16384); + + rds_iw_stats_inc(s_iw_rx_credit_updates); +} + +void rds_iw_advertise_credits(struct rds_connection *conn, unsigned int posted) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + + if (posted == 0) + return; + + atomic_add(IB_SET_POST_CREDITS(posted), &ic->i_credits); + + /* Decide whether to send an update to the peer now. + * If we would send a credit update for every single buffer we + * post, we would end up with an ACK storm (ACK arrives, + * consumes buffer, we refill the ring, send ACK to remote + * advertising the newly posted buffer... ad inf) + * + * Performance pretty much depends on how often we send + * credit updates - too frequent updates mean lots of ACKs. + * Too infrequent updates, and the peer will run out of + * credits and has to throttle. + * For the time being, 16 seems to be a good compromise. + */ + if (IB_GET_POST_CREDITS(atomic_read(&ic->i_credits)) >= 16) + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); +} + +static inline void +rds_iw_xmit_populate_wr(struct rds_iw_connection *ic, + struct rds_iw_send_work *send, unsigned int pos, + unsigned long buffer, unsigned int length, + int send_flags) +{ + struct ib_sge *sge; + + WARN_ON(pos != send - ic->i_sends); + + send->s_wr.send_flags = send_flags; + send->s_wr.opcode = IB_WR_SEND; + send->s_wr.num_sge = 2; + send->s_wr.next = NULL; + send->s_queued = jiffies; + send->s_op = NULL; + + if (length != 0) { + sge = rds_iw_data_sge(ic, send->s_sge); + sge->addr = buffer; + sge->length = length; + sge->lkey = rds_iw_local_dma_lkey(ic); + + sge = rds_iw_header_sge(ic, send->s_sge); + } else { + /* We're sending a packet with no payload. There is only + * one SGE */ + send->s_wr.num_sge = 1; + sge = &send->s_sge[0]; + } + + sge->addr = ic->i_send_hdrs_dma + (pos * sizeof(struct rds_header)); + sge->length = sizeof(struct rds_header); + sge->lkey = rds_iw_local_dma_lkey(ic); +} + +/* + * This can be called multiple times for a given message. The first time + * we see a message we map its scatterlist into the IB device so that + * we can provide that mapped address to the IB scatter gather entries + * in the IB work requests. We translate the scatterlist into a series + * of work requests that fragment the message. These work requests complete + * in order so we pass ownership of the message to the completion handler + * once we send the final fragment. + * + * The RDS core uses the c_send_lock to only enter this function once + * per connection. This makes sure that the tx ring alloc/unalloc pairs + * don't get out of sync and confuse the ring. + */ +int rds_iw_xmit(struct rds_connection *conn, struct rds_message *rm, + unsigned int hdr_off, unsigned int sg, unsigned int off) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + struct ib_device *dev = ic->i_cm_id->device; + struct rds_iw_send_work *send = NULL; + struct rds_iw_send_work *first; + struct rds_iw_send_work *prev; + struct ib_send_wr *failed_wr; + struct scatterlist *scat; + u32 pos; + u32 i; + u32 work_alloc; + u32 credit_alloc; + u32 posted; + u32 adv_credits = 0; + int send_flags = 0; + int sent; + int ret; + int flow_controlled = 0; + + BUG_ON(off % RDS_FRAG_SIZE); + BUG_ON(hdr_off != 0 && hdr_off != sizeof(struct rds_header)); + + /* Fastreg support */ + if (rds_rdma_cookie_key(rm->m_rdma_cookie) + && !ic->i_fastreg_posted) { + ret = -EAGAIN; + goto out; + } + + /* FIXME we may overallocate here */ + if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) + i = 1; + else + i = ceil(be32_to_cpu(rm->m_inc.i_hdr.h_len), RDS_FRAG_SIZE); + + work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); + if (work_alloc == 0) { + set_bit(RDS_LL_SEND_FULL, &conn->c_flags); + rds_iw_stats_inc(s_iw_tx_ring_full); + ret = -ENOMEM; + goto out; + } + + credit_alloc = work_alloc; + if (ic->i_flowctl) { + credit_alloc = rds_iw_send_grab_credits(ic, work_alloc, &posted, 0); + adv_credits += posted; + if (credit_alloc < work_alloc) { + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - credit_alloc); + work_alloc = credit_alloc; + flow_controlled++; + } + if (work_alloc == 0) { + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_iw_stats_inc(s_iw_tx_throttle); + ret = -ENOMEM; + goto out; + } + } + + /* map the message the first time we see it */ + if (ic->i_rm == NULL) { + /* + printk(KERN_NOTICE "rds_iw_xmit prep msg dport=%u flags=0x%x len=%d\n", + be16_to_cpu(rm->m_inc.i_hdr.h_dport), + rm->m_inc.i_hdr.h_flags, + be32_to_cpu(rm->m_inc.i_hdr.h_len)); + */ + if (rm->m_nents) { + rm->m_count = ib_dma_map_sg(dev, + rm->m_sg, rm->m_nents, DMA_TO_DEVICE); + rdsdebug("ic %p mapping rm %p: %d\n", ic, rm, rm->m_count); + if (rm->m_count == 0) { + rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); + ret = -ENOMEM; /* XXX ? */ + goto out; + } + } else { + rm->m_count = 0; + } + + ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; + ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; + rds_message_addref(rm); + ic->i_rm = rm; + + /* Finalize the header */ + if (test_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags)) + rm->m_inc.i_hdr.h_flags |= RDS_FLAG_ACK_REQUIRED; + if (test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) + rm->m_inc.i_hdr.h_flags |= RDS_FLAG_RETRANSMITTED; + + /* If it has a RDMA op, tell the peer we did it. This is + * used by the peer to release use-once RDMA MRs. */ + if (rm->m_rdma_op) { + struct rds_ext_header_rdma ext_hdr; + + ext_hdr.h_rdma_rkey = cpu_to_be32(rm->m_rdma_op->r_key); + rds_message_add_extension(&rm->m_inc.i_hdr, + RDS_EXTHDR_RDMA, &ext_hdr, sizeof(ext_hdr)); + } + if (rm->m_rdma_cookie) { + rds_message_add_rdma_dest_extension(&rm->m_inc.i_hdr, + rds_rdma_cookie_key(rm->m_rdma_cookie), + rds_rdma_cookie_offset(rm->m_rdma_cookie)); + } + + /* Note - rds_iw_piggyb_ack clears the ACK_REQUIRED bit, so + * we should not do this unless we have a chance of at least + * sticking the header into the send ring. Which is why we + * should call rds_iw_ring_alloc first. */ + rm->m_inc.i_hdr.h_ack = cpu_to_be64(rds_iw_piggyb_ack(ic)); + rds_message_make_checksum(&rm->m_inc.i_hdr); + + /* + * Update adv_credits since we reset the ACK_REQUIRED bit. + */ + rds_iw_send_grab_credits(ic, 0, &posted, 1); + adv_credits += posted; + BUG_ON(adv_credits > 255); + } else if (ic->i_rm != rm) + BUG(); + + send = &ic->i_sends[pos]; + first = send; + prev = NULL; + scat = &rm->m_sg[sg]; + sent = 0; + i = 0; + + /* Sometimes you want to put a fence between an RDMA + * READ and the following SEND. + * We could either do this all the time + * or when requested by the user. Right now, we let + * the application choose. + */ + if (rm->m_rdma_op && rm->m_rdma_op->r_fence) + send_flags = IB_SEND_FENCE; + + /* + * We could be copying the header into the unused tail of the page. + * That would need to be changed in the future when those pages might + * be mapped userspace pages or page cache pages. So instead we always + * use a second sge and our long-lived ring of mapped headers. We send + * the header after the data so that the data payload can be aligned on + * the receiver. + */ + + /* handle a 0-len message */ + if (be32_to_cpu(rm->m_inc.i_hdr.h_len) == 0) { + rds_iw_xmit_populate_wr(ic, send, pos, 0, 0, send_flags); + goto add_header; + } + + /* if there's data reference it with a chain of work reqs */ + for (; i < work_alloc && scat != &rm->m_sg[rm->m_count]; i++) { + unsigned int len; + + send = &ic->i_sends[pos]; + + len = min(RDS_FRAG_SIZE, ib_sg_dma_len(dev, scat) - off); + rds_iw_xmit_populate_wr(ic, send, pos, + ib_sg_dma_address(dev, scat) + off, len, + send_flags); + + /* + * We want to delay signaling completions just enough to get + * the batching benefits but not so much that we create dead time + * on the wire. + */ + if (ic->i_unsignaled_wrs-- == 0) { + ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; + send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + } + + ic->i_unsignaled_bytes -= len; + if (ic->i_unsignaled_bytes <= 0) { + ic->i_unsignaled_bytes = rds_iw_sysctl_max_unsig_bytes; + send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + } + + /* + * Always signal the last one if we're stopping due to flow control. + */ + if (flow_controlled && i == (work_alloc-1)) + send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + + rdsdebug("send %p wr %p num_sge %u next %p\n", send, + &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + + sent += len; + off += len; + if (off == ib_sg_dma_len(dev, scat)) { + scat++; + off = 0; + } + +add_header: + /* Tack on the header after the data. The header SGE should already + * have been set up to point to the right header buffer. */ + memcpy(&ic->i_send_hdrs[pos], &rm->m_inc.i_hdr, sizeof(struct rds_header)); + + if (0) { + struct rds_header *hdr = &ic->i_send_hdrs[pos]; + + printk(KERN_NOTICE "send WR dport=%u flags=0x%x len=%d\n", + be16_to_cpu(hdr->h_dport), + hdr->h_flags, + be32_to_cpu(hdr->h_len)); + } + if (adv_credits) { + struct rds_header *hdr = &ic->i_send_hdrs[pos]; + + /* add credit and redo the header checksum */ + hdr->h_credit = adv_credits; + rds_message_make_checksum(hdr); + adv_credits = 0; + rds_iw_stats_inc(s_iw_tx_credit_updates); + } + + if (prev) + prev->s_wr.next = &send->s_wr; + prev = send; + + pos = (pos + 1) % ic->i_send_ring.w_nr; + } + + /* Account the RDS header in the number of bytes we sent, but just once. + * The caller has no concept of fragmentation. */ + if (hdr_off == 0) + sent += sizeof(struct rds_header); + + /* if we finished the message then send completion owns it */ + if (scat == &rm->m_sg[rm->m_count]) { + prev->s_rm = ic->i_rm; + prev->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + ic->i_rm = NULL; + } + + if (i < work_alloc) { + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i); + work_alloc = i; + } + if (ic->i_flowctl && i < credit_alloc) + rds_iw_send_add_credits(conn, credit_alloc - i); + + /* XXX need to worry about failed_wr and partial sends. */ + failed_wr = &first->s_wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, + first, &first->s_wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_wr); + if (ret) { + printk(KERN_WARNING "RDS/IW: ib_post_send to %pI4 " + "returned %d\n", &conn->c_faddr, ret); + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); + if (prev->s_rm) { + ic->i_rm = prev->s_rm; + prev->s_rm = NULL; + } + goto out; + } + + ret = sent; +out: + BUG_ON(adv_credits); + return ret; +} + +static void rds_iw_build_send_fastreg(struct rds_iw_device *rds_iwdev, struct rds_iw_connection *ic, struct rds_iw_send_work *send, int nent, int len, u64 sg_addr) +{ + BUG_ON(nent > send->s_page_list->max_page_list_len); + /* + * Perform a WR for the fast_reg_mr. Each individual page + * in the sg list is added to the fast reg page list and placed + * inside the fast_reg_mr WR. + */ + send->s_wr.opcode = IB_WR_FAST_REG_MR; + send->s_wr.wr.fast_reg.length = len; + send->s_wr.wr.fast_reg.rkey = send->s_mr->rkey; + send->s_wr.wr.fast_reg.page_list = send->s_page_list; + send->s_wr.wr.fast_reg.page_list_len = nent; + send->s_wr.wr.fast_reg.page_shift = rds_iwdev->page_shift; + send->s_wr.wr.fast_reg.access_flags = IB_ACCESS_REMOTE_WRITE; + send->s_wr.wr.fast_reg.iova_start = sg_addr; + + ib_update_fast_reg_key(send->s_mr, send->s_remap_count++); +} + +int rds_iw_xmit_rdma(struct rds_connection *conn, struct rds_rdma_op *op) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + struct rds_iw_send_work *send = NULL; + struct rds_iw_send_work *first; + struct rds_iw_send_work *prev; + struct ib_send_wr *failed_wr; + struct rds_iw_device *rds_iwdev; + struct scatterlist *scat; + unsigned long len; + u64 remote_addr = op->r_remote_addr; + u32 pos, fr_pos; + u32 work_alloc; + u32 i; + u32 j; + int sent; + int ret; + int num_sge; + + rds_iwdev = ib_get_client_data(ic->i_cm_id->device, &rds_iw_client); + + /* map the message the first time we see it */ + if (!op->r_mapped) { + op->r_count = ib_dma_map_sg(ic->i_cm_id->device, + op->r_sg, op->r_nents, (op->r_write) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE); + rdsdebug("ic %p mapping op %p: %d\n", ic, op, op->r_count); + if (op->r_count == 0) { + rds_iw_stats_inc(s_iw_tx_sg_mapping_failure); + ret = -ENOMEM; /* XXX ? */ + goto out; + } + + op->r_mapped = 1; + } + + if (!op->r_write) { + /* Alloc space on the send queue for the fastreg */ + work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, 1, &fr_pos); + if (work_alloc != 1) { + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_iw_stats_inc(s_iw_tx_ring_full); + ret = -ENOMEM; + goto out; + } + } + + /* + * Instead of knowing how to return a partial rdma read/write we insist that there + * be enough work requests to send the entire message. + */ + i = ceil(op->r_count, rds_iwdev->max_sge); + + work_alloc = rds_iw_ring_alloc(&ic->i_send_ring, i, &pos); + if (work_alloc != i) { + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); + rds_iw_stats_inc(s_iw_tx_ring_full); + ret = -ENOMEM; + goto out; + } + + send = &ic->i_sends[pos]; + if (!op->r_write) { + first = prev = &ic->i_sends[fr_pos]; + } else { + first = send; + prev = NULL; + } + scat = &op->r_sg[0]; + sent = 0; + num_sge = op->r_count; + + for (i = 0; i < work_alloc && scat != &op->r_sg[op->r_count]; i++) { + send->s_wr.send_flags = 0; + send->s_queued = jiffies; + + /* + * We want to delay signaling completions just enough to get + * the batching benefits but not so much that we create dead time on the wire. + */ + if (ic->i_unsignaled_wrs-- == 0) { + ic->i_unsignaled_wrs = rds_iw_sysctl_max_unsig_wrs; + send->s_wr.send_flags = IB_SEND_SIGNALED; + } + + /* To avoid the need to have the plumbing to invalidate the fastreg_mr used + * for local access after RDS is finished with it, using + * IB_WR_RDMA_READ_WITH_INV will invalidate it after the read has completed. + */ + if (op->r_write) + send->s_wr.opcode = IB_WR_RDMA_WRITE; + else + send->s_wr.opcode = IB_WR_RDMA_READ_WITH_INV; + + send->s_wr.wr.rdma.remote_addr = remote_addr; + send->s_wr.wr.rdma.rkey = op->r_key; + send->s_op = op; + + if (num_sge > rds_iwdev->max_sge) { + send->s_wr.num_sge = rds_iwdev->max_sge; + num_sge -= rds_iwdev->max_sge; + } else + send->s_wr.num_sge = num_sge; + + send->s_wr.next = NULL; + + if (prev) + prev->s_wr.next = &send->s_wr; + + for (j = 0; j < send->s_wr.num_sge && scat != &op->r_sg[op->r_count]; j++) { + len = ib_sg_dma_len(ic->i_cm_id->device, scat); + + if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) + send->s_page_list->page_list[j] = ib_sg_dma_address(ic->i_cm_id->device, scat); + else { + send->s_sge[j].addr = ib_sg_dma_address(ic->i_cm_id->device, scat); + send->s_sge[j].length = len; + send->s_sge[j].lkey = rds_iw_local_dma_lkey(ic); + } + + sent += len; + rdsdebug("ic %p sent %d remote_addr %llu\n", ic, sent, remote_addr); + remote_addr += len; + + scat++; + } + + if (send->s_wr.opcode == IB_WR_RDMA_READ_WITH_INV) { + send->s_wr.num_sge = 1; + send->s_sge[0].addr = conn->c_xmit_rm->m_rs->rs_user_addr; + send->s_sge[0].length = conn->c_xmit_rm->m_rs->rs_user_bytes; + send->s_sge[0].lkey = ic->i_sends[fr_pos].s_mr->lkey; + } + + rdsdebug("send %p wr %p num_sge %u next %p\n", send, + &send->s_wr, send->s_wr.num_sge, send->s_wr.next); + + prev = send; + if (++send == &ic->i_sends[ic->i_send_ring.w_nr]) + send = ic->i_sends; + } + + /* if we finished the message then send completion owns it */ + if (scat == &op->r_sg[op->r_count]) + first->s_wr.send_flags = IB_SEND_SIGNALED; + + if (i < work_alloc) { + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc - i); + work_alloc = i; + } + + /* On iWARP, local memory access by a remote system (ie, RDMA Read) is not + * recommended. Putting the lkey on the wire is a security hole, as it can + * allow for memory access to all of memory on the remote system. Some + * adapters do not allow using the lkey for this at all. To bypass this use a + * fastreg_mr (or possibly a dma_mr) + */ + if (!op->r_write) { + rds_iw_build_send_fastreg(rds_iwdev, ic, &ic->i_sends[fr_pos], + op->r_count, sent, conn->c_xmit_rm->m_rs->rs_user_addr); + work_alloc++; + } + + failed_wr = &first->s_wr; + ret = ib_post_send(ic->i_cm_id->qp, &first->s_wr, &failed_wr); + rdsdebug("ic %p first %p (wr %p) ret %d wr %p\n", ic, + first, &first->s_wr, ret, failed_wr); + BUG_ON(failed_wr != &first->s_wr); + if (ret) { + printk(KERN_WARNING "RDS/IW: rdma ib_post_send to %pI4 " + "returned %d\n", &conn->c_faddr, ret); + rds_iw_ring_unalloc(&ic->i_send_ring, work_alloc); + goto out; + } + +out: + return ret; +} + +void rds_iw_xmit_complete(struct rds_connection *conn) +{ + struct rds_iw_connection *ic = conn->c_transport_data; + + /* We may have a pending ACK or window update we were unable + * to send previously (due to flow control). Try again. */ + rds_iw_attempt_ack(ic); +} diff --git a/net/rds/iw_stats.c b/net/rds/iw_stats.c new file mode 100644 index 000000000000..ccc7e8f0bf0e --- /dev/null +++ b/net/rds/iw_stats.c @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "rds.h" +#include "iw.h" + +DEFINE_PER_CPU(struct rds_iw_statistics, rds_iw_stats) ____cacheline_aligned; + +static char *rds_iw_stat_names[] = { + "iw_connect_raced", + "iw_listen_closed_stale", + "iw_tx_cq_call", + "iw_tx_cq_event", + "iw_tx_ring_full", + "iw_tx_throttle", + "iw_tx_sg_mapping_failure", + "iw_tx_stalled", + "iw_tx_credit_updates", + "iw_rx_cq_call", + "iw_rx_cq_event", + "iw_rx_ring_empty", + "iw_rx_refill_from_cq", + "iw_rx_refill_from_thread", + "iw_rx_alloc_limit", + "iw_rx_credit_updates", + "iw_ack_sent", + "iw_ack_send_failure", + "iw_ack_send_delayed", + "iw_ack_send_piggybacked", + "iw_ack_received", + "iw_rdma_mr_alloc", + "iw_rdma_mr_free", + "iw_rdma_mr_used", + "iw_rdma_mr_pool_flush", + "iw_rdma_mr_pool_wait", + "iw_rdma_mr_pool_depleted", +}; + +unsigned int rds_iw_stats_info_copy(struct rds_info_iterator *iter, + unsigned int avail) +{ + struct rds_iw_statistics stats = {0, }; + uint64_t *src; + uint64_t *sum; + size_t i; + int cpu; + + if (avail < ARRAY_SIZE(rds_iw_stat_names)) + goto out; + + for_each_online_cpu(cpu) { + src = (uint64_t *)&(per_cpu(rds_iw_stats, cpu)); + sum = (uint64_t *)&stats; + for (i = 0; i < sizeof(stats) / sizeof(uint64_t); i++) + *(sum++) += *(src++); + } + + rds_stats_info_copy(iter, (uint64_t *)&stats, rds_iw_stat_names, + ARRAY_SIZE(rds_iw_stat_names)); +out: + return ARRAY_SIZE(rds_iw_stat_names); +} diff --git a/net/rds/iw_sysctl.c b/net/rds/iw_sysctl.c new file mode 100644 index 000000000000..9590678cd616 --- /dev/null +++ b/net/rds/iw_sysctl.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2006 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include +#include +#include + +#include "iw.h" + +static struct ctl_table_header *rds_iw_sysctl_hdr; + +unsigned long rds_iw_sysctl_max_send_wr = RDS_IW_DEFAULT_SEND_WR; +unsigned long rds_iw_sysctl_max_recv_wr = RDS_IW_DEFAULT_RECV_WR; +unsigned long rds_iw_sysctl_max_recv_allocation = (128 * 1024 * 1024) / RDS_FRAG_SIZE; +static unsigned long rds_iw_sysctl_max_wr_min = 1; +/* hardware will fail CQ creation long before this */ +static unsigned long rds_iw_sysctl_max_wr_max = (u32)~0; + +unsigned long rds_iw_sysctl_max_unsig_wrs = 16; +static unsigned long rds_iw_sysctl_max_unsig_wr_min = 1; +static unsigned long rds_iw_sysctl_max_unsig_wr_max = 64; + +unsigned long rds_iw_sysctl_max_unsig_bytes = (16 << 20); +static unsigned long rds_iw_sysctl_max_unsig_bytes_min = 1; +static unsigned long rds_iw_sysctl_max_unsig_bytes_max = ~0UL; + +unsigned int rds_iw_sysctl_flow_control = 1; + +ctl_table rds_iw_sysctl_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_send_wr", + .data = &rds_iw_sysctl_max_send_wr, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &rds_iw_sysctl_max_wr_min, + .extra2 = &rds_iw_sysctl_max_wr_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_recv_wr", + .data = &rds_iw_sysctl_max_recv_wr, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &rds_iw_sysctl_max_wr_min, + .extra2 = &rds_iw_sysctl_max_wr_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_unsignaled_wr", + .data = &rds_iw_sysctl_max_unsig_wrs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &rds_iw_sysctl_max_unsig_wr_min, + .extra2 = &rds_iw_sysctl_max_unsig_wr_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_unsignaled_bytes", + .data = &rds_iw_sysctl_max_unsig_bytes, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &rds_iw_sysctl_max_unsig_bytes_min, + .extra2 = &rds_iw_sysctl_max_unsig_bytes_max, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "max_recv_allocation", + .data = &rds_iw_sysctl_max_recv_allocation, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + }, + { + .ctl_name = CTL_UNNUMBERED, + .procname = "flow_control", + .data = &rds_iw_sysctl_flow_control, + .maxlen = sizeof(rds_iw_sysctl_flow_control), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0} +}; + +static struct ctl_path rds_iw_sysctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "rds", .ctl_name = CTL_UNNUMBERED, }, + { .procname = "iw", .ctl_name = CTL_UNNUMBERED, }, + { } +}; + +void rds_iw_sysctl_exit(void) +{ + if (rds_iw_sysctl_hdr) + unregister_sysctl_table(rds_iw_sysctl_hdr); +} + +int __init rds_iw_sysctl_init(void) +{ + rds_iw_sysctl_hdr = register_sysctl_paths(rds_iw_sysctl_path, rds_iw_sysctl_table); + if (rds_iw_sysctl_hdr == NULL) + return -ENOMEM; + return 0; +} -- cgit v1.2.3 From 55b7ed0b582f9ff4bf3b9b44969cddda8c6763ea Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:37 +0000 Subject: RDS: Common RDMA transport code Although most of IB and iWARP are separated from each other, there is some common code required to handle their shared CM listen port. This code listens for CM events and then dispatches the event to the appropriate transport, either IB or iWARP. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/rdma_transport.c | 214 +++++++++++++++++++++++++++++++++++++++++++++++ net/rds/rdma_transport.h | 28 +++++++ 2 files changed, 242 insertions(+) create mode 100644 net/rds/rdma_transport.c create mode 100644 net/rds/rdma_transport.h (limited to 'net') diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c new file mode 100644 index 000000000000..7b19024f9706 --- /dev/null +++ b/net/rds/rdma_transport.c @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2009 Oracle. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ +#include + +#include "rdma_transport.h" + +static struct rdma_cm_id *rds_iw_listen_id; + +int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event) +{ + /* this can be null in the listening path */ + struct rds_connection *conn = cm_id->context; + struct rds_transport *trans; + int ret = 0; + + rdsdebug("conn %p id %p handling event %u\n", conn, cm_id, + event->event); + + if (cm_id->device->node_type == RDMA_NODE_RNIC) + trans = &rds_iw_transport; + else + trans = &rds_ib_transport; + + /* Prevent shutdown from tearing down the connection + * while we're executing. */ + if (conn) { + mutex_lock(&conn->c_cm_lock); + + /* If the connection is being shut down, bail out + * right away. We return 0 so cm_id doesn't get + * destroyed prematurely */ + if (rds_conn_state(conn) == RDS_CONN_DISCONNECTING) { + /* Reject incoming connections while we're tearing + * down an existing one. */ + if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST) + ret = 1; + goto out; + } + } + + switch (event->event) { + case RDMA_CM_EVENT_CONNECT_REQUEST: + ret = trans->cm_handle_connect(cm_id, event); + break; + + case RDMA_CM_EVENT_ADDR_RESOLVED: + /* XXX do we need to clean up if this fails? */ + ret = rdma_resolve_route(cm_id, + RDS_RDMA_RESOLVE_TIMEOUT_MS); + break; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + /* XXX worry about racing with listen acceptance */ + ret = trans->cm_initiate_connect(cm_id); + break; + + case RDMA_CM_EVENT_ESTABLISHED: + trans->cm_connect_complete(conn, event); + break; + + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_ADDR_CHANGE: + if (conn) + rds_conn_drop(conn); + break; + + case RDMA_CM_EVENT_DISCONNECTED: + printk(KERN_WARNING "RDS/IW: DISCONNECT event - dropping connection " + "%pI4->%pI4\n", &conn->c_laddr, + &conn->c_faddr); + rds_conn_drop(conn); + break; + + default: + /* things like device disconnect? */ + printk(KERN_ERR "unknown event %u\n", event->event); + BUG(); + break; + } + +out: + if (conn) + mutex_unlock(&conn->c_cm_lock); + + rdsdebug("id %p event %u handling ret %d\n", cm_id, event->event, ret); + + return ret; +} + +static int __init rds_rdma_listen_init(void) +{ + struct sockaddr_in sin; + struct rdma_cm_id *cm_id; + int ret; + + cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP); + if (IS_ERR(cm_id)) { + ret = PTR_ERR(cm_id); + printk(KERN_ERR "RDS/IW: failed to setup listener, " + "rdma_create_id() returned %d\n", ret); + goto out; + } + + sin.sin_family = PF_INET, + sin.sin_addr.s_addr = (__force u32)htonl(INADDR_ANY); + sin.sin_port = (__force u16)htons(RDS_PORT); + + /* + * XXX I bet this binds the cm_id to a device. If we want to support + * fail-over we'll have to take this into consideration. + */ + ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); + if (ret) { + printk(KERN_ERR "RDS/IW: failed to setup listener, " + "rdma_bind_addr() returned %d\n", ret); + goto out; + } + + ret = rdma_listen(cm_id, 128); + if (ret) { + printk(KERN_ERR "RDS/IW: failed to setup listener, " + "rdma_listen() returned %d\n", ret); + goto out; + } + + rdsdebug("cm %p listening on port %u\n", cm_id, RDS_PORT); + + rds_iw_listen_id = cm_id; + cm_id = NULL; +out: + if (cm_id) + rdma_destroy_id(cm_id); + return ret; +} + +static void rds_rdma_listen_stop(void) +{ + if (rds_iw_listen_id) { + rdsdebug("cm %p\n", rds_iw_listen_id); + rdma_destroy_id(rds_iw_listen_id); + rds_iw_listen_id = NULL; + } +} + +int __init rds_rdma_init(void) +{ + int ret; + + ret = rds_rdma_listen_init(); + if (ret) + goto out; + + ret = rds_iw_init(); + if (ret) + goto err_iw_init; + + ret = rds_ib_init(); + if (ret) + goto err_ib_init; + + goto out; + +err_ib_init: + rds_iw_exit(); +err_iw_init: + rds_rdma_listen_stop(); +out: + return ret; +} + +void rds_rdma_exit(void) +{ + /* stop listening first to ensure no new connections are attempted */ + rds_rdma_listen_stop(); + rds_ib_exit(); + rds_iw_exit(); +} + diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h new file mode 100644 index 000000000000..2f2c7d976c21 --- /dev/null +++ b/net/rds/rdma_transport.h @@ -0,0 +1,28 @@ +#ifndef _RDMA_TRANSPORT_H +#define _RDMA_TRANSPORT_H + +#include +#include +#include "rds.h" + +#define RDS_RDMA_RESOLVE_TIMEOUT_MS 5000 + +int rds_rdma_conn_connect(struct rds_connection *conn); +int rds_rdma_cm_event_handler(struct rdma_cm_id *cm_id, + struct rdma_cm_event *event); + +/* from rdma_transport.c */ +int rds_rdma_init(void); +void rds_rdma_exit(void); + +/* from ib.c */ +extern struct rds_transport rds_ib_transport; +int rds_ib_init(void); +void rds_ib_exit(void); + +/* from iw.c */ +extern struct rds_transport rds_iw_transport; +int rds_iw_init(void); +void rds_iw_exit(void); + +#endif -- cgit v1.2.3 From cbd151bfc7b619b59c44c3697e901cb7152f418e Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Thu, 26 Feb 2009 23:43:19 -0800 Subject: RDS: Add RDS to AF key strings Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/core/sock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 8ee734ea5229..0620046e4eba 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -150,7 +150,7 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" , "sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" , "sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" , - "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , + "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" , "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , @@ -165,7 +165,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" , "slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" , "slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" , - "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" , + "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" , "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , @@ -180,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" , "clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" , "clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" , - "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" , + "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" , "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , -- cgit v1.2.3 From fe17f84f5f2a7d6c1a31c04c06a016d4ad5f7dec Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Tue, 24 Feb 2009 15:30:39 +0000 Subject: RDS: Kconfig and Makefile Add RDS Kconfig and Makefile, and modify net/'s to add us to the build. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/Kconfig | 1 + net/Makefile | 1 + net/rds/Kconfig | 13 +++++++++++++ net/rds/Makefile | 14 ++++++++++++++ 4 files changed, 29 insertions(+) create mode 100644 net/rds/Kconfig create mode 100644 net/rds/Makefile (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index a12bae0e3fe9..6b39ede3b1b1 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -171,6 +171,7 @@ endif source "net/dccp/Kconfig" source "net/sctp/Kconfig" +source "net/rds/Kconfig" source "net/tipc/Kconfig" source "net/atm/Kconfig" source "net/802/Kconfig" diff --git a/net/Makefile b/net/Makefile index 0fcce89d7169..9e00a55a901b 100644 --- a/net/Makefile +++ b/net/Makefile @@ -49,6 +49,7 @@ obj-y += 8021q/ endif obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ +obj-$(CONFIG_RDS) += rds/ obj-y += wireless/ obj-$(CONFIG_MAC80211) += mac80211/ obj-$(CONFIG_TIPC) += tipc/ diff --git a/net/rds/Kconfig b/net/rds/Kconfig new file mode 100644 index 000000000000..63bd370ab6ee --- /dev/null +++ b/net/rds/Kconfig @@ -0,0 +1,13 @@ + +config RDS + tristate "Reliable Datagram Sockets (RDS) (EXPERIMENTAL)" + depends on INET && INFINIBAND_IPOIB && EXPERIMENTAL + ---help--- + RDS provides reliable, sequenced delivery of datagrams + over Infiniband. + +config RDS_DEBUG + bool "Debugging messages" + depends on RDS + default n + diff --git a/net/rds/Makefile b/net/rds/Makefile new file mode 100644 index 000000000000..51f27585fa08 --- /dev/null +++ b/net/rds/Makefile @@ -0,0 +1,14 @@ +obj-$(CONFIG_RDS) += rds.o +rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ + recv.o send.o stats.o sysctl.o threads.o transport.o \ + loop.o page.o rdma.o \ + rdma_transport.o \ + ib.o ib_cm.o ib_recv.o ib_ring.o ib_send.o ib_stats.o \ + ib_sysctl.o ib_rdma.o \ + iw.o iw_cm.o iw_recv.o iw_ring.o iw_send.o iw_stats.o \ + iw_sysctl.o iw_rdma.o + +ifeq ($(CONFIG_RDS_DEBUG), y) +EXTRA_CFLAGS += -DDEBUG +endif + -- cgit v1.2.3 From 53d6f81c7814690ba096584c733e5deaa34fdd8a Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Wed, 11 Feb 2009 22:18:49 +0530 Subject: mac80211: Make sure non-HT connection when IEEE80211_STA_TKIP_WEP_USED is set It is possible that some broken AP might send HT IEs in it's assoc response even though the STA has not sent them in assoc req when WEP/TKIP is used as pairwise cipher suite. Also it is important to check this bit before enabling ht mode in beacon receive path. Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index fbb766afe599..bf872cbba096 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1528,7 +1528,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, elems.wmm_param_len); if (elems.ht_info_elem && elems.wmm_param && - (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) + (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && + !(ifsta->flags & IEEE80211_STA_TKIP_WEP_USED)) changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, ap_ht_cap_flags); @@ -1954,7 +1955,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, erp_valid, erp_value); - if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param) { + if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && + !(ifsta->flags & IEEE80211_STA_TKIP_WEP_USED)) { struct sta_info *sta; struct ieee80211_supported_band *sband; u16 ap_ht_cap_flags; -- cgit v1.2.3 From 076ae609d20901b5fd9bc19fc4c245624c423970 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 11 Feb 2009 20:27:30 +0100 Subject: mac80211: disallow moving netns mac80211 currently assumes init_net for all interfaces, so really will not cope well with network namespaces, at least at this time. To change this, we would have keep track of the netns in addition to the ifindex, which is not something I want to think about right now. Signed-off-by: Johannes Berg Cc: Eric W. Biederman Signed-off-by: John W. Linville --- net/mac80211/iface.c | 1 + net/mac80211/main.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index df94b9365264..e8221180b6c1 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -798,6 +798,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, memcpy(ndev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); SET_NETDEV_DEV(ndev, wiphy_dev(local->hw.wiphy)); + ndev->features |= NETIF_F_NETNS_LOCAL; /* don't use IEEE80211_DEV_TO_SUB_IF because it checks too much */ sdata = netdev_priv(ndev); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 5667f4e8067f..795f8c4a9fa0 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -916,6 +916,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, ETH_ALEN); SET_NETDEV_DEV(local->mdev, wiphy_dev(local->hw.wiphy)); + local->mdev->features |= NETIF_F_NETNS_LOCAL; result = register_netdevice(local->mdev); if (result < 0) -- cgit v1.2.3 From 96f5e66e8a79810e2982cdcfa28e554f3d97da21 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Feb 2009 00:51:53 +0100 Subject: mac80211: fix aggregation for hardware with ampdu queues Hardware with AMPDU queues currently has broken aggregation. This patch fixes it by making all A-MPDUs go over the regular AC queues, but keeping track of the hardware queues in mac80211. As a first rough version, it actually stops the AC queue for extended periods of time, which can be removed by adding buffering internal to mac80211, but is currently not a huge problem because people rarely use multiple TIDs that are in the same AC (and iwlwifi currently doesn't operate as AP). This is a short-term fix, my current medium-term plan, which I hope to execute soon as well, but am not sure can finish before .30, looks like this: 1) rework the internal queuing layer in mac80211 that we use for fragments if the driver stopped queue in the middle of a fragmented frame to be able to queue more frames at once (rather than just a single frame with its fragments) 2) instead of stopping the entire AC queue, queue up the frames in a per-station/per-TID queue during aggregation session initiation, when the session has come up take all those frames and put them onto the queue from 1) 3) push the ampdu queue layer abstraction this patch introduces in mac80211 into the driver, and remove the virtual queue stuff from mac80211 again This plan will probably also affect ath9k in that mac80211 queues the frames instead of passing them down, even when there are no ampdu queues. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 186 ++++++++++++++++++++++++++++++--------------- net/mac80211/ieee80211_i.h | 20 +++-- net/mac80211/main.c | 9 ++- net/mac80211/sta_info.c | 15 +++- net/mac80211/sta_info.h | 4 +- net/mac80211/tx.c | 18 +++-- net/mac80211/util.c | 75 ++++++++++++++---- net/mac80211/wme.c | 161 +-------------------------------------- net/mac80211/wme.h | 6 -- 9 files changed, 241 insertions(+), 253 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 1232d9f01ca9..0217b68c47ca 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -132,9 +132,24 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, state = &sta->ampdu_mlme.tid_state_tx[tid]; - if (local->hw.ampdu_queues) - ieee80211_stop_queue(&local->hw, sta->tid_to_tx_q[tid]); + if (local->hw.ampdu_queues) { + if (initiator) { + /* + * Stop the AC queue to avoid issues where we send + * unaggregated frames already before the delba. + */ + ieee80211_stop_queue_by_reason(&local->hw, + local->hw.queues + sta->tid_to_tx_q[tid], + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + } + /* + * Pretend the driver woke the queue, just in case + * it disabled it before the session was stopped. + */ + ieee80211_wake_queue( + &local->hw, local->hw.queues + sta->tid_to_tx_q[tid]); + } *state = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); @@ -144,8 +159,6 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { *state = HT_AGG_STATE_OPERATIONAL; - if (local->hw.ampdu_queues) - ieee80211_wake_queue(&local->hw, sta->tid_to_tx_q[tid]); } return ret; @@ -189,14 +202,19 @@ static void sta_addba_resp_timer_expired(unsigned long data) spin_unlock_bh(&sta->lock); } +static inline int ieee80211_ac_from_tid(int tid) +{ + return ieee802_1d_to_ac[tid & 7]; +} + int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) { struct ieee80211_local *local = hw_to_local(hw); struct sta_info *sta; struct ieee80211_sub_if_data *sdata; - u16 start_seq_num; u8 *state; - int ret = 0; + int i, qn = -1, ret = 0; + u16 start_seq_num; if (WARN_ON(!local->ops->ampdu_action)) return -EINVAL; @@ -209,6 +227,13 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ + if (hw->ampdu_queues && ieee80211_ac_from_tid(tid) == 0) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "rejecting on voice AC\n"); +#endif + return -EINVAL; + } + rcu_read_lock(); sta = sta_info_get(local, ra); @@ -217,7 +242,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) printk(KERN_DEBUG "Could not find the station\n"); #endif ret = -ENOENT; - goto exit; + goto unlock; } /* @@ -230,11 +255,13 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) sta->sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sta->sdata->vif.type != NL80211_IFTYPE_AP) { ret = -EINVAL; - goto exit; + goto unlock; } spin_lock_bh(&sta->lock); + sdata = sta->sdata; + /* we have tried too many times, receiver does not want A-MPDU */ if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { ret = -EBUSY; @@ -252,6 +279,42 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) goto err_unlock_sta; } + if (hw->ampdu_queues) { + spin_lock(&local->queue_stop_reason_lock); + /* reserve a new queue for this session */ + for (i = 0; i < local->hw.ampdu_queues; i++) { + if (local->ampdu_ac_queue[i] < 0) { + qn = i; + local->ampdu_ac_queue[qn] = + ieee80211_ac_from_tid(tid); + break; + } + } + spin_unlock(&local->queue_stop_reason_lock); + + if (qn < 0) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - " + "queue unavailable for tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + ret = -ENOSPC; + goto err_unlock_sta; + } + + /* + * If we successfully allocate the session, we can't have + * anything going on on the queue this TID maps into, so + * stop it for now. This is a "virtual" stop using the same + * mechanism that drivers will use. + * + * XXX: queue up frames for this session in the sta_info + * struct instead to avoid hitting all other STAs. + */ + ieee80211_stop_queue_by_reason( + &local->hw, hw->queues + qn, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + } + /* prepare A-MPDU MLME for Tx aggregation */ sta->ampdu_mlme.tid_tx[tid] = kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); @@ -262,8 +325,9 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) tid); #endif ret = -ENOMEM; - goto err_unlock_sta; + goto err_return_queue; } + /* Tx timer */ sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = sta_addba_resp_timer_expired; @@ -271,49 +335,25 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) (unsigned long)&sta->timer_to_tid[tid]; init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - if (hw->ampdu_queues) { - /* create a new queue for this aggregation */ - ret = ieee80211_ht_agg_queue_add(local, sta, tid); - - /* case no queue is available to aggregation - * don't switch to aggregation */ - if (ret) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - " - "queue unavailable for tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto err_unlock_queue; - } - } - sdata = sta->sdata; - /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the * call back right away, it must see that the flow has begun */ *state |= HT_ADDBA_REQUESTED_MSK; - /* This is slightly racy because the queue isn't stopped */ start_seq_num = sta->tid_seq[tid]; ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, &sta->sta, tid, &start_seq_num); if (ret) { - /* No need to requeue the packets in the agg queue, since we - * held the tx lock: no packet could be enqueued to the newly - * allocated queue */ - if (hw->ampdu_queues) - ieee80211_ht_agg_queue_remove(local, sta, tid, 0); #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "BA request denied - HW unavailable for" " tid %d\n", tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ *state = HT_AGG_STATE_IDLE; - goto err_unlock_queue; + goto err_free; } + sta->tid_to_tx_q[tid] = qn; - /* Will put all the packets in the new SW queue */ - if (hw->ampdu_queues) - ieee80211_requeue(local, ieee802_1d_to_ac[tid]); spin_unlock_bh(&sta->lock); /* send an addBA request */ @@ -322,7 +362,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) sta->ampdu_mlme.dialog_token_allocator; sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; - ieee80211_send_addba_request(sta->sdata, ra, tid, sta->ampdu_mlme.tid_tx[tid]->dialog_token, sta->ampdu_mlme.tid_tx[tid]->ssn, @@ -334,15 +373,24 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); #endif - goto exit; + goto unlock; -err_unlock_queue: + err_free: kfree(sta->ampdu_mlme.tid_tx[tid]); sta->ampdu_mlme.tid_tx[tid] = NULL; - ret = -EBUSY; -err_unlock_sta: + err_return_queue: + if (qn >= 0) { + /* We failed, so start queue again right away. */ + ieee80211_wake_queue_by_reason(hw, hw->queues + qn, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + /* give queue back to pool */ + spin_lock(&local->queue_stop_reason_lock); + local->ampdu_ac_queue[qn] = -1; + spin_unlock(&local->queue_stop_reason_lock); + } + err_unlock_sta: spin_unlock_bh(&sta->lock); -exit: + unlock: rcu_read_unlock(); return ret; } @@ -375,7 +423,7 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) state = &sta->ampdu_mlme.tid_state_tx[tid]; spin_lock_bh(&sta->lock); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { + if (WARN_ON(!(*state & HT_ADDBA_REQUESTED_MSK))) { #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", *state); @@ -385,7 +433,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) return; } - WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK); + if (WARN_ON(*state & HT_ADDBA_DRV_READY_MSK)) + goto out; *state |= HT_ADDBA_DRV_READY_MSK; @@ -393,9 +442,18 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) #ifdef CONFIG_MAC80211_HT_DEBUG printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); #endif - if (hw->ampdu_queues) - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues) { + /* + * Wake up this queue, we stopped it earlier, + * this will in turn wake the entire AC. + */ + ieee80211_wake_queue_by_reason(hw, + hw->queues + sta->tid_to_tx_q[tid], + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + } } + + out: spin_unlock_bh(&sta->lock); rcu_read_unlock(); } @@ -485,7 +543,6 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) struct ieee80211_local *local = hw_to_local(hw); struct sta_info *sta; u8 *state; - int agg_queue; if (tid >= STA_TID_NUM) { #ifdef CONFIG_MAC80211_HT_DEBUG @@ -527,19 +584,19 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) ieee80211_send_delba(sta->sdata, ra, tid, WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - if (hw->ampdu_queues) { - agg_queue = sta->tid_to_tx_q[tid]; - ieee80211_ht_agg_queue_remove(local, sta, tid, 1); + spin_lock_bh(&sta->lock); - /* We just requeued the all the frames that were in the - * removed queue, and since we might miss a softirq we do - * netif_schedule_queue. ieee80211_wake_queue is not used - * here as this queue is not necessarily stopped + if (*state & HT_AGG_STATE_INITIATOR_MSK && + hw->ampdu_queues) { + /* + * Wake up this queue, we stopped it earlier, + * this will in turn wake the entire AC. */ - netif_schedule_queue(netdev_get_tx_queue(local->mdev, - agg_queue)); + ieee80211_wake_queue_by_reason(hw, + hw->queues + sta->tid_to_tx_q[tid], + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); } - spin_lock_bh(&sta->lock); + *state = HT_AGG_STATE_IDLE; sta->ampdu_mlme.addba_req_num[tid] = 0; kfree(sta->ampdu_mlme.tid_tx[tid]); @@ -613,12 +670,21 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, #endif /* CONFIG_MAC80211_HT_DEBUG */ if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) == WLAN_STATUS_SUCCESS) { + u8 curstate = *state; + *state |= HT_ADDBA_RECEIVED_MSK; - sta->ampdu_mlme.addba_req_num[tid] = 0; - if (*state == HT_AGG_STATE_OPERATIONAL && - local->hw.ampdu_queues) - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + if (hw->ampdu_queues && *state != curstate && + *state == HT_AGG_STATE_OPERATIONAL) { + /* + * Wake up this queue, we stopped it earlier, + * this will in turn wake the entire AC. + */ + ieee80211_wake_queue_by_reason(hw, + hw->queues + sta->tid_to_tx_q[tid], + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + } + sta->ampdu_mlme.addba_req_num[tid] = 0; if (local->ops->ampdu_action) { (void)local->ops->ampdu_action(hw, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2cb743ed9f9c..e2bbd3f11797 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -564,12 +564,10 @@ enum { enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_DRIVER, IEEE80211_QUEUE_STOP_REASON_PS, - IEEE80211_QUEUE_STOP_REASON_CSA + IEEE80211_QUEUE_STOP_REASON_CSA, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION, }; -/* maximum number of hardware queues we support. */ -#define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES) - struct ieee80211_master_priv { struct ieee80211_local *local; }; @@ -582,9 +580,15 @@ struct ieee80211_local { const struct ieee80211_ops *ops; - unsigned long queue_pool[BITS_TO_LONGS(QD_MAX_QUEUES)]; - unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES]; + /* AC queue corresponding to each AMPDU queue */ + s8 ampdu_ac_queue[IEEE80211_MAX_AMPDU_QUEUES]; + unsigned int amdpu_ac_stop_refcnt[IEEE80211_MAX_AMPDU_QUEUES]; + + unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES + + IEEE80211_MAX_AMPDU_QUEUES]; + /* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */ spinlock_t queue_stop_reason_lock; + struct net_device *mdev; /* wmaster# - "master" 802.11 device */ int open_count; int monitors, cooked_mntrs; @@ -1042,6 +1046,10 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason); void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason); +void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason); +void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 795f8c4a9fa0..e9181981adcd 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -705,7 +705,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { struct ieee80211_local *local; - int priv_size; + int priv_size, i; struct wiphy *wiphy; /* Ensure 32-byte alignment of our private data and hw private data. @@ -779,6 +779,11 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, setup_timer(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, (unsigned long) local); + for (i = 0; i < IEEE80211_MAX_AMPDU_QUEUES; i++) + local->ampdu_ac_queue[i] = -1; + /* using an s8 won't work with more than that */ + BUILD_BUG_ON(IEEE80211_MAX_AMPDU_QUEUES > 127); + sta_info_init(local); tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, @@ -872,7 +877,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), "wmaster%d", ieee80211_master_setup, - ieee80211_num_queues(hw)); + hw->queues); if (!mdev) goto fail_mdev_alloc; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 634f65c0130e..4ba3c540fcf3 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -202,6 +202,18 @@ void sta_info_destroy(struct sta_info *sta) /* Make sure timer won't free the tid_rx struct, see below */ if (tid_rx) tid_rx->shutdown = true; + + /* + * The stop callback cannot find this station any more, but + * it didn't complete its work -- start the queue if necessary + */ + if (sta->ampdu_mlme.tid_state_tx[i] & HT_AGG_STATE_INITIATOR_MSK && + sta->ampdu_mlme.tid_state_tx[i] & HT_AGG_STATE_REQ_STOP_BA_MSK && + local->hw.ampdu_queues) + ieee80211_wake_queue_by_reason(&local->hw, + local->hw.queues + sta->tid_to_tx_q[i], + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + spin_unlock_bh(&sta->lock); /* @@ -275,8 +287,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, * enable session_timer's data differentiation. refer to * sta_rx_agg_session_timer_expired for useage */ sta->timer_to_tid[i] = i; - /* tid to tx queue: initialize according to HW (0 is valid) */ - sta->tid_to_tx_q[i] = ieee80211_num_queues(&local->hw); + sta->tid_to_tx_q[i] = -1; /* rx */ sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE; sta->ampdu_mlme.tid_rx[i] = NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index d9653231992f..a2921f15787b 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -200,7 +200,7 @@ struct sta_ampdu_mlme { * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers - * @tid_to_tx_q: map tid to tx queue + * @tid_to_tx_q: map tid to tx queue (invalid == negative values) * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state @@ -275,7 +275,7 @@ struct sta_info { */ struct sta_ampdu_mlme ampdu_mlme; u8 timer_to_tid[STA_TID_NUM]; - u8 tid_to_tx_q[STA_TID_NUM]; + s8 tid_to_tx_q[STA_TID_NUM]; #ifdef CONFIG_MAC80211_MESH /* diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 33926831c648..6aca49897d55 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -876,7 +876,6 @@ ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) return TX_CONTINUE; } - /* actual transmit path */ /* @@ -1016,12 +1015,20 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, tx->sta = sta_info_get(local, hdr->addr1); if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { + unsigned long flags; qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + spin_lock_irqsave(&tx->sta->lock, flags); state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; - if (*state == HT_AGG_STATE_OPERATIONAL) + if (*state == HT_AGG_STATE_OPERATIONAL) { info->flags |= IEEE80211_TX_CTL_AMPDU; + if (local->hw.ampdu_queues) + skb_set_queue_mapping( + skb, tx->local->hw.queues + + tx->sta->tid_to_tx_q[tid]); + } + spin_unlock_irqrestore(&tx->sta->lock, flags); } if (is_multicast_ether_addr(hdr->addr1)) { @@ -1085,7 +1092,8 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, int ret, i; if (skb) { - if (netif_subqueue_stopped(local->mdev, skb)) + if (ieee80211_queue_stopped(&local->hw, + skb_get_queue_mapping(skb))) return IEEE80211_TX_PENDING; ret = local->ops->tx(local_to_hw(local), skb); @@ -1101,8 +1109,8 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, info = IEEE80211_SKB_CB(tx->extra_frag[i]); info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT); - if (netif_subqueue_stopped(local->mdev, - tx->extra_frag[i])) + if (ieee80211_queue_stopped(&local->hw, + skb_get_queue_mapping(tx->extra_frag[i]))) return IEEE80211_TX_FRAG_AGAIN; ret = local->ops->tx(local_to_hw(local), diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 73c7d7345abd..92ea1770461b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -344,15 +344,36 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, { struct ieee80211_local *local = hw_to_local(hw); - /* we don't need to track ampdu queues */ - if (queue < ieee80211_num_regular_queues(hw)) { - __clear_bit(reason, &local->queue_stop_reasons[queue]); + if (queue >= hw->queues) { + if (local->ampdu_ac_queue[queue - hw->queues] < 0) + return; + + /* + * for virtual aggregation queues, we need to refcount the + * internal mac80211 disable (multiple times!), keep track of + * driver disable _and_ make sure the regular queue is + * actually enabled. + */ + if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION) + local->amdpu_ac_stop_refcnt[queue - hw->queues]--; + else + __clear_bit(reason, &local->queue_stop_reasons[queue]); - if (local->queue_stop_reasons[queue] != 0) - /* someone still has this queue stopped */ + if (local->queue_stop_reasons[queue] || + local->amdpu_ac_stop_refcnt[queue - hw->queues]) return; + + /* now go on to treat the corresponding regular queue */ + queue = local->ampdu_ac_queue[queue - hw->queues]; + reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION; } + __clear_bit(reason, &local->queue_stop_reasons[queue]); + + if (local->queue_stop_reasons[queue] != 0) + /* someone still has this queue stopped */ + return; + if (test_bit(queue, local->queues_pending)) { set_bit(queue, local->queues_pending_run); tasklet_schedule(&local->tx_pending_tasklet); @@ -361,8 +382,8 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, } } -static void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason) +void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; @@ -384,15 +405,33 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, { struct ieee80211_local *local = hw_to_local(hw); - /* we don't need to track ampdu queues */ - if (queue < ieee80211_num_regular_queues(hw)) - __set_bit(reason, &local->queue_stop_reasons[queue]); + if (queue >= hw->queues) { + if (local->ampdu_ac_queue[queue - hw->queues] < 0) + return; + + /* + * for virtual aggregation queues, we need to refcount the + * internal mac80211 disable (multiple times!), keep track of + * driver disable _and_ make sure the regular queue is + * actually enabled. + */ + if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION) + local->amdpu_ac_stop_refcnt[queue - hw->queues]++; + else + __set_bit(reason, &local->queue_stop_reasons[queue]); + + /* now go on to treat the corresponding regular queue */ + queue = local->ampdu_ac_queue[queue - hw->queues]; + reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION; + } + + __set_bit(reason, &local->queue_stop_reasons[queue]); netif_stop_subqueue(local->mdev, queue); } -static void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, - enum queue_stop_reason reason) +void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, + enum queue_stop_reason reason) { struct ieee80211_local *local = hw_to_local(hw); unsigned long flags; @@ -418,7 +457,7 @@ void ieee80211_stop_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (i = 0; i < ieee80211_num_queues(hw); i++) + for (i = 0; i < hw->queues; i++) __ieee80211_stop_queue(hw, i, reason); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); @@ -434,6 +473,16 @@ EXPORT_SYMBOL(ieee80211_stop_queues); int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) { struct ieee80211_local *local = hw_to_local(hw); + unsigned long flags; + + if (queue >= hw->queues) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + queue = local->ampdu_ac_queue[queue - hw->queues]; + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + if (queue < 0) + return true; + } + return __netif_subqueue_stopped(local->mdev, queue); } EXPORT_SYMBOL(ieee80211_queue_stopped); diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index ac71b38f7cb5..093a4ab7f28b 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -114,9 +114,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) { struct ieee80211_master_priv *mpriv = netdev_priv(dev); struct ieee80211_local *local = mpriv->local; - struct ieee80211_hw *hw = &local->hw; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct sta_info *sta; u16 queue; u8 tid; @@ -124,29 +122,11 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) if (unlikely(queue >= local->hw.queues)) queue = local->hw.queues - 1; - if (skb->requeue) { - if (!hw->ampdu_queues) - return queue; - - rcu_read_lock(); - sta = sta_info_get(local, hdr->addr1); - tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; - if (sta) { - int ampdu_queue = sta->tid_to_tx_q[tid]; - - if ((ampdu_queue < ieee80211_num_queues(hw)) && - test_bit(ampdu_queue, local->queue_pool)) - queue = ampdu_queue; - } - rcu_read_unlock(); - - return queue; - } - - /* Now we know the 1d priority, fill in the QoS header if - * there is one. + /* + * Now we know the 1d priority, fill in the QoS header if + * there is one (and we haven't done this before). */ - if (ieee80211_is_data_qos(hdr->frame_control)) { + if (!skb->requeue && ieee80211_is_data_qos(hdr->frame_control)) { u8 *p = ieee80211_get_qos_ctl(hdr); u8 ack_policy = 0; tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; @@ -156,140 +136,7 @@ u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) /* qos header is 2 bytes, second reserved */ *p++ = ack_policy | tid; *p = 0; - - if (!hw->ampdu_queues) - return queue; - - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - if (sta) { - int ampdu_queue = sta->tid_to_tx_q[tid]; - - if ((ampdu_queue < ieee80211_num_queues(hw)) && - test_bit(ampdu_queue, local->queue_pool)) - queue = ampdu_queue; - } - - rcu_read_unlock(); } return queue; } - -int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, - struct sta_info *sta, u16 tid) -{ - int i; - - /* XXX: currently broken due to cb/requeue use */ - return -EPERM; - - /* prepare the filter and save it for the SW queue - * matching the received HW queue */ - - if (!local->hw.ampdu_queues) - return -EPERM; - - /* try to get a Qdisc from the pool */ - for (i = local->hw.queues; i < ieee80211_num_queues(&local->hw); i++) - if (!test_and_set_bit(i, local->queue_pool)) { - ieee80211_stop_queue(local_to_hw(local), i); - sta->tid_to_tx_q[tid] = i; - - /* IF there are already pending packets - * on this tid first we need to drain them - * on the previous queue - * since HT is strict in order */ -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "allocated aggregation queue" - " %d tid %d addr %pM pool=0x%lX\n", - i, tid, sta->sta.addr, - local->queue_pool[0]); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - return 0; - } - - return -EAGAIN; -} - -/** - * the caller needs to hold netdev_get_tx_queue(local->mdev, X)->lock - */ -void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, - struct sta_info *sta, u16 tid, - u8 requeue) -{ - int agg_queue = sta->tid_to_tx_q[tid]; - struct ieee80211_hw *hw = &local->hw; - - /* return the qdisc to the pool */ - clear_bit(agg_queue, local->queue_pool); - sta->tid_to_tx_q[tid] = ieee80211_num_queues(hw); - - if (requeue) { - ieee80211_requeue(local, agg_queue); - } else { - struct netdev_queue *txq; - spinlock_t *root_lock; - struct Qdisc *q; - - txq = netdev_get_tx_queue(local->mdev, agg_queue); - q = rcu_dereference(txq->qdisc); - root_lock = qdisc_lock(q); - - spin_lock_bh(root_lock); - qdisc_reset(q); - spin_unlock_bh(root_lock); - } -} - -void ieee80211_requeue(struct ieee80211_local *local, int queue) -{ - struct netdev_queue *txq = netdev_get_tx_queue(local->mdev, queue); - struct sk_buff_head list; - spinlock_t *root_lock; - struct Qdisc *qdisc; - u32 len; - - rcu_read_lock_bh(); - - qdisc = rcu_dereference(txq->qdisc); - if (!qdisc || !qdisc->dequeue) - goto out_unlock; - - skb_queue_head_init(&list); - - root_lock = qdisc_root_lock(qdisc); - spin_lock(root_lock); - for (len = qdisc->q.qlen; len > 0; len--) { - struct sk_buff *skb = qdisc->dequeue(qdisc); - - if (skb) - __skb_queue_tail(&list, skb); - } - spin_unlock(root_lock); - - for (len = list.qlen; len > 0; len--) { - struct sk_buff *skb = __skb_dequeue(&list); - u16 new_queue; - - BUG_ON(!skb); - new_queue = ieee80211_select_queue(local->mdev, skb); - skb_set_queue_mapping(skb, new_queue); - - txq = netdev_get_tx_queue(local->mdev, new_queue); - - - qdisc = rcu_dereference(txq->qdisc); - root_lock = qdisc_root_lock(qdisc); - - spin_lock(root_lock); - qdisc_enqueue_root(skb, qdisc); - spin_unlock(root_lock); - } - -out_unlock: - rcu_read_unlock_bh(); -} diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index bc62f28a4d3d..7520d2e014dc 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -21,11 +21,5 @@ extern const int ieee802_1d_to_ac[8]; u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb); -int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, - struct sta_info *sta, u16 tid); -void ieee80211_ht_agg_queue_remove(struct ieee80211_local *local, - struct sta_info *sta, u16 tid, - u8 requeue); -void ieee80211_requeue(struct ieee80211_local *local, int queue); #endif /* _WME_H */ -- cgit v1.2.3 From 469002983fc90c2ff0959e2b03335c0fe2e4d5a9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 15 Feb 2009 12:44:28 +0100 Subject: mac80211: split IBSS/managed code This patch splits out the ibss code and data from managed (station) mode. The reason to do this is to better separate the state machines, and have the code be contained better so it gets easier to determine what exactly a given change will affect, that in turn makes it easier to understand. This is quite some churn, especially because I split sdata->u.sta into sdata->u.mgd and sdata->u.ibss, but I think it's easier to maintain that way. I've also shuffled around some code -- null function sending is only applicable to managed interfaces so put that into that file, some other functions are needed from various places so put them into util, and also rearranged the prototypes in ieee80211_i.h accordingly. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/Makefile | 1 + net/mac80211/agg-rx.c | 6 +- net/mac80211/agg-tx.c | 5 +- net/mac80211/cfg.c | 45 +- net/mac80211/debugfs_netdev.c | 48 +- net/mac80211/ht.c | 6 +- net/mac80211/ibss.c | 888 ++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 117 ++- net/mac80211/iface.c | 76 +- net/mac80211/key.c | 2 +- net/mac80211/main.c | 9 +- net/mac80211/mlme.c | 1646 +++++++++++------------------------------ net/mac80211/rx.c | 37 +- net/mac80211/scan.c | 43 +- net/mac80211/spectmgmt.c | 26 +- net/mac80211/tx.c | 11 +- net/mac80211/util.c | 176 +++++ net/mac80211/wext.c | 141 ++-- 18 files changed, 1803 insertions(+), 1480 deletions(-) create mode 100644 net/mac80211/ibss.c (limited to 'net') diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 3503a3d21318..0e3ab88bb706 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -9,6 +9,7 @@ mac80211-y := \ wpa.o \ scan.o \ ht.o agg-tx.o agg-rx.o \ + ibss.o \ mlme.o \ iface.o \ rate.o \ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 3112bfd441b6..a95affc94629 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -129,7 +129,6 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d u8 dialog_token, u16 status, u16 policy, u16 buf_size, u16 timeout) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; @@ -151,8 +150,9 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + else if (sdata->vif.type == NL80211_IFTYPE_STATION) + memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 0217b68c47ca..1df116d4d6e7 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -49,7 +49,6 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, u16 agg_size, u16 timeout) { struct ieee80211_local *local = sdata->local; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u16 capab; @@ -69,8 +68,8 @@ static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + else if (sdata->vif.type == NL80211_IFTYPE_STATION) + memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c8d969be440b..f453bb7c564b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1180,45 +1180,45 @@ static int set_mgmt_extra_ie_sta(struct ieee80211_sub_if_data *sdata, u8 subtype, u8 *ies, size_t ies_len) { struct ieee80211_local *local = sdata->local; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; switch (subtype) { case IEEE80211_STYPE_PROBE_REQ >> 4: if (local->ops->hw_scan) break; - kfree(ifsta->ie_probereq); - ifsta->ie_probereq = ies; - ifsta->ie_probereq_len = ies_len; + kfree(ifmgd->ie_probereq); + ifmgd->ie_probereq = ies; + ifmgd->ie_probereq_len = ies_len; return 0; case IEEE80211_STYPE_PROBE_RESP >> 4: - kfree(ifsta->ie_proberesp); - ifsta->ie_proberesp = ies; - ifsta->ie_proberesp_len = ies_len; + kfree(ifmgd->ie_proberesp); + ifmgd->ie_proberesp = ies; + ifmgd->ie_proberesp_len = ies_len; return 0; case IEEE80211_STYPE_AUTH >> 4: - kfree(ifsta->ie_auth); - ifsta->ie_auth = ies; - ifsta->ie_auth_len = ies_len; + kfree(ifmgd->ie_auth); + ifmgd->ie_auth = ies; + ifmgd->ie_auth_len = ies_len; return 0; case IEEE80211_STYPE_ASSOC_REQ >> 4: - kfree(ifsta->ie_assocreq); - ifsta->ie_assocreq = ies; - ifsta->ie_assocreq_len = ies_len; + kfree(ifmgd->ie_assocreq); + ifmgd->ie_assocreq = ies; + ifmgd->ie_assocreq_len = ies_len; return 0; case IEEE80211_STYPE_REASSOC_REQ >> 4: - kfree(ifsta->ie_reassocreq); - ifsta->ie_reassocreq = ies; - ifsta->ie_reassocreq_len = ies_len; + kfree(ifmgd->ie_reassocreq); + ifmgd->ie_reassocreq = ies; + ifmgd->ie_reassocreq_len = ies_len; return 0; case IEEE80211_STYPE_DEAUTH >> 4: - kfree(ifsta->ie_deauth); - ifsta->ie_deauth = ies; - ifsta->ie_deauth_len = ies_len; + kfree(ifmgd->ie_deauth); + ifmgd->ie_deauth = ies; + ifmgd->ie_deauth_len = ies_len; return 0; case IEEE80211_STYPE_DISASSOC >> 4: - kfree(ifsta->ie_disassoc); - ifsta->ie_disassoc = ies; - ifsta->ie_disassoc_len = ies_len; + kfree(ifmgd->ie_disassoc); + ifmgd->ie_disassoc = ies; + ifmgd->ie_disassoc_len = ies_len; return 0; } @@ -1248,7 +1248,6 @@ static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy, switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_ADHOC: ret = set_mgmt_extra_ie_sta(sdata, params->subtype, ies, ies_len); break; diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index c54219301724..e3420329f4e6 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -94,31 +94,31 @@ IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC); IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC); IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC); -/* STA/IBSS attributes */ -IEEE80211_IF_FILE(state, u.sta.state, DEC); -IEEE80211_IF_FILE(bssid, u.sta.bssid, MAC); -IEEE80211_IF_FILE(prev_bssid, u.sta.prev_bssid, MAC); -IEEE80211_IF_FILE(ssid_len, u.sta.ssid_len, SIZE); -IEEE80211_IF_FILE(aid, u.sta.aid, DEC); -IEEE80211_IF_FILE(ap_capab, u.sta.ap_capab, HEX); -IEEE80211_IF_FILE(capab, u.sta.capab, HEX); -IEEE80211_IF_FILE(extra_ie_len, u.sta.extra_ie_len, SIZE); -IEEE80211_IF_FILE(auth_tries, u.sta.auth_tries, DEC); -IEEE80211_IF_FILE(assoc_tries, u.sta.assoc_tries, DEC); -IEEE80211_IF_FILE(auth_algs, u.sta.auth_algs, HEX); -IEEE80211_IF_FILE(auth_alg, u.sta.auth_alg, DEC); -IEEE80211_IF_FILE(auth_transaction, u.sta.auth_transaction, DEC); +/* STA attributes */ +IEEE80211_IF_FILE(state, u.mgd.state, DEC); +IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC); +IEEE80211_IF_FILE(prev_bssid, u.mgd.prev_bssid, MAC); +IEEE80211_IF_FILE(ssid_len, u.mgd.ssid_len, SIZE); +IEEE80211_IF_FILE(aid, u.mgd.aid, DEC); +IEEE80211_IF_FILE(ap_capab, u.mgd.ap_capab, HEX); +IEEE80211_IF_FILE(capab, u.mgd.capab, HEX); +IEEE80211_IF_FILE(extra_ie_len, u.mgd.extra_ie_len, SIZE); +IEEE80211_IF_FILE(auth_tries, u.mgd.auth_tries, DEC); +IEEE80211_IF_FILE(assoc_tries, u.mgd.assoc_tries, DEC); +IEEE80211_IF_FILE(auth_algs, u.mgd.auth_algs, HEX); +IEEE80211_IF_FILE(auth_alg, u.mgd.auth_alg, DEC); +IEEE80211_IF_FILE(auth_transaction, u.mgd.auth_transaction, DEC); static ssize_t ieee80211_if_fmt_flags( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) { return scnprintf(buf, buflen, "%s%s%s%s%s%s%s\n", - sdata->u.sta.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", - sdata->u.sta.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", - sdata->u.sta.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", - sdata->u.sta.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", - sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", - sdata->u.sta.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", + sdata->u.mgd.flags & IEEE80211_STA_SSID_SET ? "SSID\n" : "", + sdata->u.mgd.flags & IEEE80211_STA_BSSID_SET ? "BSSID\n" : "", + sdata->u.mgd.flags & IEEE80211_STA_PREV_BSSID_SET ? "prev BSSID\n" : "", + sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED ? "AUTH\n" : "", + sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED ? "ASSOC\n" : "", + sdata->u.mgd.flags & IEEE80211_STA_PROBEREQ_POLL ? "PROBEREQ POLL\n" : "", sdata->vif.bss_conf.use_cts_prot ? "CTS prot\n" : ""); } __IEEE80211_IF_FILE(flags); @@ -283,9 +283,11 @@ static void add_files(struct ieee80211_sub_if_data *sdata) #endif break; case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_ADHOC: add_sta_files(sdata); break; + case NL80211_IFTYPE_ADHOC: + /* XXX */ + break; case NL80211_IFTYPE_AP: add_ap_files(sdata); break; @@ -418,9 +420,11 @@ static void del_files(struct ieee80211_sub_if_data *sdata) #endif break; case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_ADHOC: del_sta_files(sdata); break; + case NL80211_IFTYPE_ADHOC: + /* XXX */ + break; case NL80211_IFTYPE_AP: del_ap_files(sdata); break; diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 82ea0b63a386..69b6e9a4df3d 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -169,7 +169,6 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, u16 initiator, u16 reason_code) { struct ieee80211_local *local = sdata->local; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u16 params; @@ -190,8 +189,9 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + else if (sdata->vif.type == NL80211_IFTYPE_STATION) + memcpy(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c new file mode 100644 index 000000000000..1bbfc7029879 --- /dev/null +++ b/net/mac80211/ibss.c @@ -0,0 +1,888 @@ +/* + * IBSS mode implementation + * Copyright 2003-2008, Jouni Malinen + * Copyright 2004, Instant802 Networks, Inc. + * Copyright 2005, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc + * Copyright 2007, Michael Wu + * Copyright 2009, Johannes Berg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ieee80211_i.h" +#include "rate.h" + +#define IEEE80211_SCAN_INTERVAL (2 * HZ) +#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) +#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) + +#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) +#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) + +#define IEEE80211_IBSS_MAX_STA_ENTRIES 128 + + +static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + u16 auth_alg, auth_transaction, status_code; + + if (len < 24 + 6) + return; + + auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); + auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); + status_code = le16_to_cpu(mgmt->u.auth.status_code); + + /* + * IEEE 802.11 standard does not require authentication in IBSS + * networks and most implementations do not seem to use it. + * However, try to reply to authentication attempts if someone + * has actually implemented this. + */ + if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1) + ieee80211_send_auth(sdata, 2, WLAN_AUTH_OPEN, NULL, 0, + sdata->u.ibss.bssid, 0); +} + +static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, + const u8 *bssid, const int beacon_int, + const int freq, + const size_t supp_rates_len, + const u8 *supp_rates, + const u16 capability) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + int res = 0, rates, i, j; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u8 *pos; + struct ieee80211_supported_band *sband; + union iwreq_data wrqu; + + if (local->ops->reset_tsf) { + /* Reset own TSF to allow time synchronization work. */ + local->ops->reset_tsf(local_to_hw(local)); + } + + if ((ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET) && + memcmp(ifibss->bssid, bssid, ETH_ALEN) == 0) + return res; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for probe " + "response\n", sdata->dev->name); + return -ENOMEM; + } + + if (!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET)) { + /* Remove possible STA entries from other IBSS networks. */ + sta_info_flush_delayed(sdata); + } + + memcpy(ifibss->bssid, bssid, ETH_ALEN); + res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); + if (res) + return res; + + local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10; + + sdata->drop_unencrypted = capability & + WLAN_CAPABILITY_PRIVACY ? 1 : 0; + + res = ieee80211_set_freq(sdata, freq); + + if (res) + return res; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + /* Build IBSS probe response */ + + skb_reserve(skb, local->hw.extra_tx_headroom); + + mgmt = (struct ieee80211_mgmt *) + skb_put(skb, 24 + sizeof(mgmt->u.beacon)); + memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_RESP); + memset(mgmt->da, 0xff, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); + mgmt->u.beacon.beacon_int = + cpu_to_le16(local->hw.conf.beacon_int); + mgmt->u.beacon.capab_info = cpu_to_le16(capability); + + pos = skb_put(skb, 2 + ifibss->ssid_len); + *pos++ = WLAN_EID_SSID; + *pos++ = ifibss->ssid_len; + memcpy(pos, ifibss->ssid, ifibss->ssid_len); + + rates = supp_rates_len; + if (rates > 8) + rates = 8; + pos = skb_put(skb, 2 + rates); + *pos++ = WLAN_EID_SUPP_RATES; + *pos++ = rates; + memcpy(pos, supp_rates, rates); + + if (sband->band == IEEE80211_BAND_2GHZ) { + pos = skb_put(skb, 2 + 1); + *pos++ = WLAN_EID_DS_PARAMS; + *pos++ = 1; + *pos++ = ieee80211_frequency_to_channel(freq); + } + + pos = skb_put(skb, 2 + 2); + *pos++ = WLAN_EID_IBSS_PARAMS; + *pos++ = 2; + /* FIX: set ATIM window based on scan results */ + *pos++ = 0; + *pos++ = 0; + + if (supp_rates_len > 8) { + rates = supp_rates_len - 8; + pos = skb_put(skb, 2 + rates); + *pos++ = WLAN_EID_EXT_SUPP_RATES; + *pos++ = rates; + memcpy(pos, &supp_rates[8], rates); + } + + ifibss->probe_resp = skb; + + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | + IEEE80211_IFCC_BEACON_ENABLED); + + + rates = 0; + for (i = 0; i < supp_rates_len; i++) { + int bitrate = (supp_rates[i] & 0x7f) * 5; + for (j = 0; j < sband->n_bitrates; j++) + if (sband->bitrates[j].bitrate == bitrate) + rates |= BIT(j); + } + + ieee80211_sta_def_wmm_params(sdata, supp_rates_len, supp_rates); + + ifibss->flags |= IEEE80211_IBSS_PREV_BSSID_SET; + ifibss->state = IEEE80211_IBSS_MLME_JOINED; + mod_timer(&ifibss->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); + + memset(&wrqu, 0, sizeof(wrqu)); + memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); + wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); + + return res; +} + +static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss *bss) +{ + return __ieee80211_sta_join_ibss(sdata, + bss->cbss.bssid, + bss->cbss.beacon_interval, + bss->cbss.channel->center_freq, + bss->supp_rates_len, bss->supp_rates, + bss->cbss.capability); +} + +static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee80211_rx_status *rx_status, + struct ieee802_11_elems *elems, + bool beacon) +{ + struct ieee80211_local *local = sdata->local; + int freq; + struct ieee80211_bss *bss; + struct sta_info *sta; + struct ieee80211_channel *channel; + u64 beacon_timestamp, rx_timestamp; + u32 supp_rates = 0; + enum ieee80211_band band = rx_status->band; + + if (elems->ds_params && elems->ds_params_len == 1) + freq = ieee80211_channel_to_frequency(elems->ds_params[0]); + else + freq = rx_status->freq; + + channel = ieee80211_get_channel(local->hw.wiphy, freq); + + if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + return; + + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && + memcmp(mgmt->bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) { + supp_rates = ieee80211_sta_get_rates(local, elems, band); + + rcu_read_lock(); + + sta = sta_info_get(local, mgmt->sa); + if (sta) { + u32 prev_rates; + + prev_rates = sta->sta.supp_rates[band]; + /* make sure mandatory rates are always added */ + sta->sta.supp_rates[band] = supp_rates | + ieee80211_mandatory_rates(local, band); + +#ifdef CONFIG_MAC80211_IBSS_DEBUG + if (sta->sta.supp_rates[band] != prev_rates) + printk(KERN_DEBUG "%s: updated supp_rates set " + "for %pM based on beacon info (0x%llx | " + "0x%llx -> 0x%llx)\n", + sdata->dev->name, + sta->sta.addr, + (unsigned long long) prev_rates, + (unsigned long long) supp_rates, + (unsigned long long) sta->sta.supp_rates[band]); +#endif + } else + ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); + + rcu_read_unlock(); + } + + bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, + channel, beacon); + if (!bss) + return; + + /* was just updated in ieee80211_bss_info_update */ + beacon_timestamp = bss->cbss.tsf; + + /* check if we need to merge IBSS */ + + /* merge only on beacons (???) */ + if (!beacon) + goto put_bss; + + /* we use a fixed BSSID */ + if (sdata->u.ibss.flags & IEEE80211_IBSS_BSSID_SET) + goto put_bss; + + /* not an IBSS */ + if (!(bss->cbss.capability & WLAN_CAPABILITY_IBSS)) + goto put_bss; + + /* different channel */ + if (bss->cbss.channel != local->oper_channel) + goto put_bss; + + /* different SSID */ + if (elems->ssid_len != sdata->u.ibss.ssid_len || + memcmp(elems->ssid, sdata->u.ibss.ssid, + sdata->u.ibss.ssid_len)) + goto put_bss; + + if (rx_status->flag & RX_FLAG_TSFT) { + /* + * For correct IBSS merging we need mactime; since mactime is + * defined as the time the first data symbol of the frame hits + * the PHY, and the timestamp of the beacon is defined as "the + * time that the data symbol containing the first bit of the + * timestamp is transmitted to the PHY plus the transmitting + * STA's delays through its local PHY from the MAC-PHY + * interface to its interface with the WM" (802.11 11.1.2) + * - equals the time this bit arrives at the receiver - we have + * to take into account the offset between the two. + * + * E.g. at 1 MBit that means mactime is 192 usec earlier + * (=24 bytes * 8 usecs/byte) than the beacon timestamp. + */ + int rate; + + if (rx_status->flag & RX_FLAG_HT) + rate = 65; /* TODO: HT rates */ + else + rate = local->hw.wiphy->bands[band]-> + bitrates[rx_status->rate_idx].bitrate; + + rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); + } else if (local && local->ops && local->ops->get_tsf) + /* second best option: get current TSF */ + rx_timestamp = local->ops->get_tsf(local_to_hw(local)); + else + /* can't merge without knowing the TSF */ + rx_timestamp = -1LLU; + +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "RX beacon SA=%pM BSSID=" + "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", + mgmt->sa, mgmt->bssid, + (unsigned long long)rx_timestamp, + (unsigned long long)beacon_timestamp, + (unsigned long long)(rx_timestamp - beacon_timestamp), + jiffies); +#endif + + if (beacon_timestamp > rx_timestamp) { +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: beacon TSF higher than " + "local TSF - IBSS merge with BSSID %pM\n", + sdata->dev->name, mgmt->bssid); +#endif + ieee80211_sta_join_ibss(sdata, bss); + ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); + } + + put_bss: + ieee80211_rx_bss_put(local, bss); +} + +/* + * Add a new IBSS station, will also be called by the RX code when, + * in IBSS mode, receiving a frame from a yet-unknown station, hence + * must be callable in atomic context. + */ +struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, + u8 *bssid,u8 *addr, u32 supp_rates) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + int band = local->hw.conf.channel->band; + + /* TODO: Could consider removing the least recently used entry and + * allow new one to be added. */ + if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: No room for a new IBSS STA " + "entry %pM\n", sdata->dev->name, addr); + } + return NULL; + } + + if (compare_ether_addr(bssid, sdata->u.ibss.bssid)) + return NULL; + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", + wiphy_name(local->hw.wiphy), addr, sdata->dev->name); +#endif + + sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); + if (!sta) + return NULL; + + set_sta_flags(sta, WLAN_STA_AUTHORIZED); + + /* make sure mandatory rates are always added */ + sta->sta.supp_rates[band] = supp_rates | + ieee80211_mandatory_rates(local, band); + + rate_control_rate_init(sta); + + if (sta_info_insert(sta)) + return NULL; + + return sta; +} + +static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + int active = 0; + struct sta_info *sta; + + rcu_read_lock(); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sta->sdata == sdata && + time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, + jiffies)) { + active++; + break; + } + } + + rcu_read_unlock(); + + return active; +} + + +static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + + mod_timer(&ifibss->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); + + ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); + if (ieee80211_sta_active_ibss(sdata)) + return; + + if ((ifibss->flags & IEEE80211_IBSS_BSSID_SET) && + (!(ifibss->flags & IEEE80211_IBSS_AUTO_CHANNEL_SEL))) + return; + + printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " + "IBSS networks with same SSID (merge)\n", sdata->dev->name); + + /* XXX maybe racy? */ + if (sdata->local->scan_req) + return; + + memcpy(sdata->local->int_scan_req.ssids[0].ssid, + ifibss->ssid, IEEE80211_MAX_SSID_LEN); + sdata->local->int_scan_req.ssids[0].ssid_len = ifibss->ssid_len; + ieee80211_request_scan(sdata, &sdata->local->int_scan_req); +} + +static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + u8 *pos; + u8 bssid[ETH_ALEN]; + u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; + u16 capability; + int i; + + if (ifibss->flags & IEEE80211_IBSS_BSSID_SET) { + memcpy(bssid, ifibss->bssid, ETH_ALEN); + } else { + /* Generate random, not broadcast, locally administered BSSID. Mix in + * own MAC address to make sure that devices that do not have proper + * random number generator get different BSSID. */ + get_random_bytes(bssid, ETH_ALEN); + for (i = 0; i < ETH_ALEN; i++) + bssid[i] ^= sdata->dev->dev_addr[i]; + bssid[0] &= ~0x01; + bssid[0] |= 0x02; + } + + printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n", + sdata->dev->name, bssid); + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + if (local->hw.conf.beacon_int == 0) + local->hw.conf.beacon_int = 100; + + capability = WLAN_CAPABILITY_IBSS; + + if (sdata->default_key) + capability |= WLAN_CAPABILITY_PRIVACY; + else + sdata->drop_unencrypted = 0; + + pos = supp_rates; + for (i = 0; i < sband->n_bitrates; i++) { + int rate = sband->bitrates[i].bitrate; + *pos++ = (u8) (rate / 5); + } + + return __ieee80211_sta_join_ibss(sdata, + bssid, local->hw.conf.beacon_int, + local->hw.conf.channel->center_freq, + sband->n_bitrates, supp_rates, + capability); +} + +static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss; + const u8 *bssid = NULL; + int active_ibss; + + if (ifibss->ssid_len == 0) + return -EINVAL; + + active_ibss = ieee80211_sta_active_ibss(sdata); +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", + sdata->dev->name, active_ibss); +#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + + if (active_ibss) + return 0; + + if (ifibss->flags & IEEE80211_IBSS_BSSID_SET) + bssid = ifibss->bssid; + bss = (void *)cfg80211_get_bss(local->hw.wiphy, NULL, bssid, + ifibss->ssid, ifibss->ssid_len, + WLAN_CAPABILITY_IBSS, + WLAN_CAPABILITY_IBSS); + +#ifdef CONFIG_MAC80211_IBSS_DEBUG + if (bss) + printk(KERN_DEBUG " sta_find_ibss: selected %pM current " + "%pM\n", bss->cbss.bssid, ifibss->bssid); +#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + + if (bss && + (!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET) || + memcmp(ifibss->bssid, bss->cbss.bssid, ETH_ALEN))) { + int ret; + + printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM" + " based on configured SSID\n", + sdata->dev->name, bss->cbss.bssid); + + ret = ieee80211_sta_join_ibss(sdata, bss); + ieee80211_rx_bss_put(local, bss); + return ret; + } else if (bss) + ieee80211_rx_bss_put(local, bss); + +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG " did not try to join ibss\n"); +#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + + /* Selected IBSS not found in current scan results - try to scan */ + if (ifibss->state == IEEE80211_IBSS_MLME_JOINED && + !ieee80211_sta_active_ibss(sdata)) { + mod_timer(&ifibss->timer, jiffies + + IEEE80211_IBSS_MERGE_INTERVAL); + } else if (time_after(jiffies, local->last_scan_completed + + IEEE80211_SCAN_INTERVAL)) { + printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " + "join\n", sdata->dev->name); + + /* XXX maybe racy? */ + if (local->scan_req) + return -EBUSY; + + memcpy(local->int_scan_req.ssids[0].ssid, + ifibss->ssid, IEEE80211_MAX_SSID_LEN); + local->int_scan_req.ssids[0].ssid_len = ifibss->ssid_len; + return ieee80211_request_scan(sdata, &local->int_scan_req); + } else if (ifibss->state != IEEE80211_IBSS_MLME_JOINED) { + int interval = IEEE80211_SCAN_INTERVAL; + + if (time_after(jiffies, ifibss->ibss_join_req + + IEEE80211_IBSS_JOIN_TIMEOUT)) { + if (!(local->oper_channel->flags & + IEEE80211_CHAN_NO_IBSS)) + return ieee80211_sta_create_ibss(sdata); + printk(KERN_DEBUG "%s: IBSS not allowed on" + " %d MHz\n", sdata->dev->name, + local->hw.conf.channel->center_freq); + + /* No IBSS found - decrease scan interval and continue + * scanning. */ + interval = IEEE80211_SCAN_INTERVAL_SLOW; + } + + ifibss->state = IEEE80211_IBSS_MLME_SEARCH; + mod_timer(&ifibss->timer, jiffies + interval); + return 0; + } + + return 0; +} + +static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + int tx_last_beacon; + struct sk_buff *skb; + struct ieee80211_mgmt *resp; + u8 *pos, *end; + + if (ifibss->state != IEEE80211_IBSS_MLME_JOINED || + len < 24 + 2 || !ifibss->probe_resp) + return; + + if (local->ops->tx_last_beacon) + tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local)); + else + tx_last_beacon = 1; + +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM" + " (tx_last_beacon=%d)\n", + sdata->dev->name, mgmt->sa, mgmt->da, + mgmt->bssid, tx_last_beacon); +#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + + if (!tx_last_beacon) + return; + + if (memcmp(mgmt->bssid, ifibss->bssid, ETH_ALEN) != 0 && + memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0) + return; + + end = ((u8 *) mgmt) + len; + pos = mgmt->u.probe_req.variable; + if (pos[0] != WLAN_EID_SSID || + pos + 2 + pos[1] > end) { +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " + "from %pM\n", + sdata->dev->name, mgmt->sa); +#endif + return; + } + if (pos[1] != 0 && + (pos[1] != ifibss->ssid_len || + memcmp(pos + 2, ifibss->ssid, ifibss->ssid_len) != 0)) { + /* Ignore ProbeReq for foreign SSID */ + return; + } + + /* Reply with ProbeResp */ + skb = skb_copy(ifibss->probe_resp, GFP_KERNEL); + if (!skb) + return; + + resp = (struct ieee80211_mgmt *) skb->data; + memcpy(resp->da, mgmt->sa, ETH_ALEN); +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n", + sdata->dev->name, resp->da); +#endif /* CONFIG_MAC80211_IBSS_DEBUG */ + ieee80211_tx_skb(sdata, skb, 0); +} + +static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee80211_rx_status *rx_status) +{ + size_t baselen; + struct ieee802_11_elems elems; + + if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) + return; /* ignore ProbeResp to foreign address */ + + baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, + &elems); + + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); +} + +static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee80211_rx_status *rx_status) +{ + size_t baselen; + struct ieee802_11_elems elems; + + /* Process beacon from the current BSS */ + baselen = (u8 *) mgmt->u.beacon.variable - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); + + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); +} + +static void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_rx_status *rx_status; + struct ieee80211_mgmt *mgmt; + u16 fc; + + rx_status = (struct ieee80211_rx_status *) skb->cb; + mgmt = (struct ieee80211_mgmt *) skb->data; + fc = le16_to_cpu(mgmt->frame_control); + + switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_PROBE_REQ: + ieee80211_rx_mgmt_probe_req(sdata, mgmt, skb->len); + break; + case IEEE80211_STYPE_PROBE_RESP: + ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_BEACON: + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_AUTH: + ieee80211_rx_mgmt_auth_ibss(sdata, mgmt, skb->len); + break; + } + + kfree_skb(skb); +} + +static void ieee80211_ibss_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, u.ibss.work); + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_ibss *ifibss; + struct sk_buff *skb; + + if (!netif_running(sdata->dev)) + return; + + if (local->sw_scanning || local->hw_scanning) + return; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_ADHOC)) + return; + ifibss = &sdata->u.ibss; + + while ((skb = skb_dequeue(&ifibss->skb_queue))) + ieee80211_ibss_rx_queued_mgmt(sdata, skb); + + if (!test_and_clear_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request)) + return; + + switch (ifibss->state) { + case IEEE80211_IBSS_MLME_SEARCH: + ieee80211_sta_find_ibss(sdata); + break; + case IEEE80211_IBSS_MLME_JOINED: + ieee80211_sta_merge_ibss(sdata); + break; + default: + WARN_ON(1); + break; + } +} + +static void ieee80211_ibss_timer(unsigned long data) +{ + struct ieee80211_sub_if_data *sdata = + (struct ieee80211_sub_if_data *) data; + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + struct ieee80211_local *local = sdata->local; + + set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request); + queue_work(local->hw.workqueue, &ifibss->work); +} + +void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + + INIT_WORK(&ifibss->work, ieee80211_ibss_work); + setup_timer(&ifibss->timer, ieee80211_ibss_timer, + (unsigned long) sdata); + skb_queue_head_init(&ifibss->skb_queue); + + ifibss->flags |= IEEE80211_IBSS_AUTO_BSSID_SEL | + IEEE80211_IBSS_AUTO_CHANNEL_SEL; +} + +int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + + if (len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; + + if (ifibss->ssid_len != len || memcmp(ifibss->ssid, ssid, len) != 0) { + memset(ifibss->ssid, 0, sizeof(ifibss->ssid)); + memcpy(ifibss->ssid, ssid, len); + ifibss->ssid_len = len; + } + + ifibss->flags &= ~IEEE80211_IBSS_PREV_BSSID_SET; + + if (len) + ifibss->flags |= IEEE80211_IBSS_SSID_SET; + else + ifibss->flags &= ~IEEE80211_IBSS_SSID_SET; + + ifibss->ibss_join_req = jiffies; + ifibss->state = IEEE80211_IBSS_MLME_SEARCH; + return ieee80211_sta_find_ibss(sdata); +} + +int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + + memcpy(ssid, ifibss->ssid, ifibss->ssid_len); + *len = ifibss->ssid_len; + + return 0; +} + +int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + + if (is_valid_ether_addr(bssid)) { + memcpy(ifibss->bssid, bssid, ETH_ALEN); + ifibss->flags |= IEEE80211_IBSS_BSSID_SET; + } else { + memset(ifibss->bssid, 0, ETH_ALEN); + ifibss->flags &= ~IEEE80211_IBSS_BSSID_SET; + } + + if (netif_running(sdata->dev)) { + if (ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID)) { + printk(KERN_DEBUG "%s: Failed to config new BSSID to " + "the low-level driver\n", sdata->dev->name); + } + } + + return ieee80211_ibss_set_ssid(sdata, ifibss->ssid, ifibss->ssid_len); +} + +/* scan finished notification */ +void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata = local->scan_sdata; + struct ieee80211_if_ibss *ifibss; + + if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) { + ifibss = &sdata->u.ibss; + if ((!(ifibss->flags & IEEE80211_IBSS_PREV_BSSID_SET)) || + !ieee80211_sta_active_ibss(sdata)) + ieee80211_sta_find_ibss(sdata); + } +} + +ieee80211_rx_result +ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_mgmt *mgmt; + u16 fc; + + if (skb->len < 24) + return RX_DROP_MONITOR; + + mgmt = (struct ieee80211_mgmt *) skb->data; + fc = le16_to_cpu(mgmt->frame_control); + + switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_PROBE_RESP: + case IEEE80211_STYPE_BEACON: + memcpy(skb->cb, rx_status, sizeof(*rx_status)); + case IEEE80211_STYPE_PROBE_REQ: + case IEEE80211_STYPE_AUTH: + skb_queue_tail(&sdata->u.ibss.skb_queue, skb); + queue_work(local->hw.workqueue, &sdata->u.ibss.work); + return RX_QUEUED; + } + + return RX_DROP_MONITOR; +} diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e2bbd3f11797..27d56414019d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -239,7 +239,7 @@ struct mesh_preq_queue { u8 flags; }; -/* flags used in struct ieee80211_if_sta.flags */ +/* flags used in struct ieee80211_if_managed.flags */ #define IEEE80211_STA_SSID_SET BIT(0) #define IEEE80211_STA_BSSID_SET BIT(1) #define IEEE80211_STA_PREV_BSSID_SET BIT(2) @@ -262,31 +262,30 @@ struct mesh_preq_queue { #define IEEE80211_STA_REQ_AUTH 2 #define IEEE80211_STA_REQ_RUN 3 -/* STA/IBSS MLME states */ -enum ieee80211_sta_mlme_state { - IEEE80211_STA_MLME_DISABLED, - IEEE80211_STA_MLME_DIRECT_PROBE, - IEEE80211_STA_MLME_AUTHENTICATE, - IEEE80211_STA_MLME_ASSOCIATE, - IEEE80211_STA_MLME_ASSOCIATED, - IEEE80211_STA_MLME_IBSS_SEARCH, - IEEE80211_STA_MLME_IBSS_JOINED, -}; - /* bitfield of allowed auth algs */ #define IEEE80211_AUTH_ALG_OPEN BIT(0) #define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) #define IEEE80211_AUTH_ALG_LEAP BIT(2) -struct ieee80211_if_sta { +struct ieee80211_if_managed { struct timer_list timer; struct timer_list chswitch_timer; struct work_struct work; struct work_struct chswitch_work; + u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; + u8 ssid[IEEE80211_MAX_SSID_LEN]; - enum ieee80211_sta_mlme_state state; size_t ssid_len; + + enum { + IEEE80211_STA_MLME_DISABLED, + IEEE80211_STA_MLME_DIRECT_PROBE, + IEEE80211_STA_MLME_AUTHENTICATE, + IEEE80211_STA_MLME_ASSOCIATE, + IEEE80211_STA_MLME_ASSOCIATED, + } state; + u16 aid; u16 ap_capab, capab; u8 *extra_ie; /* to be added to the end of AssocReq */ @@ -319,10 +318,6 @@ struct ieee80211_if_sta { IEEE80211_MFP_REQUIRED } mfp; /* management frame protection */ - unsigned long ibss_join_req; - struct sk_buff *probe_resp; /* ProbeResp template for IBSS */ - u32 supp_rates_bits[IEEE80211_NUM_BANDS]; - int wmm_last_param_set; /* Extra IE data for management frames */ @@ -342,6 +337,42 @@ struct ieee80211_if_sta { size_t ie_disassoc_len; }; +enum ieee80211_ibss_flags { + IEEE80211_IBSS_AUTO_CHANNEL_SEL = BIT(0), + IEEE80211_IBSS_AUTO_BSSID_SEL = BIT(1), + IEEE80211_IBSS_BSSID_SET = BIT(2), + IEEE80211_IBSS_PREV_BSSID_SET = BIT(3), + IEEE80211_IBSS_SSID_SET = BIT(4), +}; + +enum ieee80211_ibss_request { + IEEE80211_IBSS_REQ_RUN = 0, +}; + +struct ieee80211_if_ibss { + struct timer_list timer; + struct work_struct work; + + struct sk_buff_head skb_queue; + + u8 ssid[IEEE80211_MAX_SSID_LEN]; + u8 ssid_len; + + u32 flags; + + u8 bssid[ETH_ALEN]; + + unsigned long request; + + unsigned long ibss_join_req; + struct sk_buff *probe_resp; /* ProbeResp template for IBSS */ + + enum { + IEEE80211_IBSS_MLME_SEARCH, + IEEE80211_IBSS_MLME_JOINED, + } state; +}; + struct ieee80211_if_mesh { struct work_struct work; struct timer_list housekeeping_timer; @@ -445,7 +476,8 @@ struct ieee80211_sub_if_data { struct ieee80211_if_ap ap; struct ieee80211_if_wds wds; struct ieee80211_if_vlan vlan; - struct ieee80211_if_sta sta; + struct ieee80211_if_managed mgd; + struct ieee80211_if_ibss ibss; #ifdef CONFIG_MAC80211_MESH struct ieee80211_if_mesh mesh; #endif @@ -892,34 +924,39 @@ void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_configure_filter(struct ieee80211_local *local); +u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); /* wireless extensions */ extern const struct iw_handler_def ieee80211_iw_handler_def; -/* STA/IBSS code */ +/* STA code */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); -void ieee80211_scan_work(struct work_struct *work); -void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - struct ieee80211_rx_status *rx_status); +ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct ieee80211_rx_status *rx_status); int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); -void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta); -struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid, u8 *addr, u32 supp_rates); +void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata); int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); -u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); -u32 ieee80211_sta_get_rates(struct ieee80211_local *local, - struct ieee802_11_elems *elems, - enum ieee80211_band band); -void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - u8 *ssid, size_t ssid_len); void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); +/* IBSS code */ +int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); +int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); +int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); +void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local); +void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata); +ieee80211_rx_result +ieee80211_ibss_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + struct ieee80211_rx_status *rx_status); +struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, + u8 *bssid, u8 *addr, u32 supp_rates); + /* scan/BSS handling */ +void ieee80211_scan_work(struct work_struct *work); int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req); int ieee80211_scan_results(struct ieee80211_local *local, @@ -1051,6 +1088,20 @@ void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, enum queue_stop_reason reason); +void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, + u16 transaction, u16 auth_alg, + u8 *extra, size_t extra_len, + const u8 *bssid, int encrypt); +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + u8 *ssid, size_t ssid_len); + +void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, + const size_t supp_rates_len, + const u8 *supp_rates); +u32 ieee80211_sta_get_rates(struct ieee80211_local *local, + struct ieee802_11_elems *elems, + enum ieee80211_band band); + #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline #else diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e8221180b6c1..2acc416e77e1 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -236,7 +236,10 @@ static int ieee80211_open(struct net_device *dev) break; case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: - sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + sdata->u.mgd.flags &= ~IEEE80211_STA_PREV_BSSID_SET; + else + sdata->u.ibss.flags &= ~IEEE80211_IBSS_PREV_BSSID_SET; /* fall through */ default: conf.vif = &sdata->vif; @@ -321,11 +324,10 @@ static int ieee80211_open(struct net_device *dev) * yet be effective. Trigger execution of ieee80211_sta_work * to fix this. */ - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - queue_work(local->hw.workqueue, &ifsta->work); - } + if (sdata->vif.type == NL80211_IFTYPE_STATION) + queue_work(local->hw.workqueue, &sdata->u.mgd.work); + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + queue_work(local->hw.workqueue, &sdata->u.ibss.work); netif_tx_start_all_queues(dev); @@ -452,15 +454,13 @@ static int ieee80211_stop(struct net_device *dev) netif_addr_unlock_bh(local->mdev); break; case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_ADHOC: /* Announce that we are leaving the network. */ - if (sdata->u.sta.state != IEEE80211_STA_MLME_DISABLED) + if (sdata->u.mgd.state != IEEE80211_STA_MLME_DISABLED) ieee80211_sta_deauthenticate(sdata, WLAN_REASON_DEAUTH_LEAVING); - - memset(sdata->u.sta.bssid, 0, ETH_ALEN); - del_timer_sync(&sdata->u.sta.chswitch_timer); - del_timer_sync(&sdata->u.sta.timer); + memset(sdata->u.mgd.bssid, 0, ETH_ALEN); + del_timer_sync(&sdata->u.mgd.chswitch_timer); + del_timer_sync(&sdata->u.mgd.timer); /* * If the timer fired while we waited for it, it will have * requeued the work. Now the work will be running again @@ -468,8 +468,8 @@ static int ieee80211_stop(struct net_device *dev) * whether the interface is running, which, at this point, * it no longer is. */ - cancel_work_sync(&sdata->u.sta.work); - cancel_work_sync(&sdata->u.sta.chswitch_work); + cancel_work_sync(&sdata->u.mgd.work); + cancel_work_sync(&sdata->u.mgd.chswitch_work); /* * When we get here, the interface is marked down. * Call synchronize_rcu() to wait for the RX path @@ -477,13 +477,22 @@ static int ieee80211_stop(struct net_device *dev) * frames at this very time on another CPU. */ synchronize_rcu(); - skb_queue_purge(&sdata->u.sta.skb_queue); + skb_queue_purge(&sdata->u.mgd.skb_queue); - sdata->u.sta.flags &= ~(IEEE80211_STA_PRIVACY_INVOKED | + sdata->u.mgd.flags &= ~(IEEE80211_STA_PRIVACY_INVOKED | IEEE80211_STA_TKIP_WEP_USED); - kfree(sdata->u.sta.extra_ie); - sdata->u.sta.extra_ie = NULL; - sdata->u.sta.extra_ie_len = 0; + kfree(sdata->u.mgd.extra_ie); + sdata->u.mgd.extra_ie = NULL; + sdata->u.mgd.extra_ie_len = 0; + /* fall through */ + case NL80211_IFTYPE_ADHOC: + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + memset(sdata->u.ibss.bssid, 0, ETH_ALEN); + del_timer_sync(&sdata->u.ibss.timer); + cancel_work_sync(&sdata->u.ibss.work); + synchronize_rcu(); + skb_queue_purge(&sdata->u.ibss.skb_queue); + } /* fall through */ case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -629,19 +638,20 @@ static void ieee80211_teardown_sdata(struct net_device *dev) if (ieee80211_vif_is_mesh(&sdata->vif)) mesh_rmc_free(sdata); break; - case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: - kfree(sdata->u.sta.extra_ie); - kfree(sdata->u.sta.assocreq_ies); - kfree(sdata->u.sta.assocresp_ies); - kfree_skb(sdata->u.sta.probe_resp); - kfree(sdata->u.sta.ie_probereq); - kfree(sdata->u.sta.ie_proberesp); - kfree(sdata->u.sta.ie_auth); - kfree(sdata->u.sta.ie_assocreq); - kfree(sdata->u.sta.ie_reassocreq); - kfree(sdata->u.sta.ie_deauth); - kfree(sdata->u.sta.ie_disassoc); + kfree_skb(sdata->u.ibss.probe_resp); + break; + case NL80211_IFTYPE_STATION: + kfree(sdata->u.mgd.extra_ie); + kfree(sdata->u.mgd.assocreq_ies); + kfree(sdata->u.mgd.assocresp_ies); + kfree(sdata->u.mgd.ie_probereq); + kfree(sdata->u.mgd.ie_proberesp); + kfree(sdata->u.mgd.ie_auth); + kfree(sdata->u.mgd.ie_assocreq); + kfree(sdata->u.mgd.ie_reassocreq); + kfree(sdata->u.mgd.ie_deauth); + kfree(sdata->u.mgd.ie_disassoc); break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: @@ -708,9 +718,11 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, INIT_LIST_HEAD(&sdata->u.ap.vlans); break; case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_ADHOC: ieee80211_sta_setup_sdata(sdata); break; + case NL80211_IFTYPE_ADHOC: + ieee80211_ibss_setup_sdata(sdata); + break; case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) ieee80211_mesh_init_sdata(sdata); diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 19b480de4bbc..687acf23054d 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -400,7 +400,7 @@ void ieee80211_key_link(struct ieee80211_key *key, */ /* same here, the AP could be using QoS */ - ap = sta_info_get(key->local, key->sdata->u.sta.bssid); + ap = sta_info_get(key->local, key->sdata->u.mgd.bssid); if (ap) { if (test_sta_flags(ap, WLAN_STA_WME)) key->conf.flags |= diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e9181981adcd..fce9d08986e9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -169,9 +169,10 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) memset(&conf, 0, sizeof(conf)); - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) - conf.bssid = sdata->u.sta.bssid; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + conf.bssid = sdata->u.mgd.bssid; + else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + conf.bssid = sdata->u.ibss.bssid; else if (sdata->vif.type == NL80211_IFTYPE_AP) conf.bssid = sdata->dev->dev_addr; else if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -210,7 +211,7 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) !!rcu_dereference(sdata->u.ap.beacon); break; case NL80211_IFTYPE_ADHOC: - conf.enable_beacon = !!sdata->u.sta.probe_resp; + conf.enable_beacon = !!sdata->u.ibss.probe_resp; break; case NL80211_IFTYPE_MESH_POINT: conf.enable_beacon = true; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index bf872cbba096..ec5a0900cba0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -15,11 +15,8 @@ #include #include #include -#include -#include #include #include -#include #include #include @@ -35,15 +32,6 @@ #define IEEE80211_MONITORING_INTERVAL (2 * HZ) #define IEEE80211_PROBE_INTERVAL (60 * HZ) #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) -#define IEEE80211_SCAN_INTERVAL (2 * HZ) -#define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) -#define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) - -#define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) -#define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) - -#define IEEE80211_IBSS_MAX_STA_ENTRIES 128 - /* utils */ static int ecw2cw(int ecw) @@ -92,43 +80,6 @@ static int ieee80211_compatible_rates(struct ieee80211_bss *bss, return count; } -/* also used by mesh code */ -u32 ieee80211_sta_get_rates(struct ieee80211_local *local, - struct ieee802_11_elems *elems, - enum ieee80211_band band) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *bitrates; - size_t num_rates; - u32 supp_rates; - int i, j; - sband = local->hw.wiphy->bands[band]; - - if (!sband) { - WARN_ON(1); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - } - - bitrates = sband->bitrates; - num_rates = sband->n_bitrates; - supp_rates = 0; - for (i = 0; i < elems->supp_rates_len + - elems->ext_supp_rates_len; i++) { - u8 rate = 0; - int own_rate; - if (i < elems->supp_rates_len) - rate = elems->supp_rates[i]; - else if (elems->ext_supp_rates) - rate = elems->ext_supp_rates - [i - elems->supp_rates_len]; - own_rate = 5 * (rate & 0x7f); - for (j = 0; j < num_rates; j++) - if (bitrates[j].bitrate == own_rate) - supp_rates |= BIT(j); - } - return supp_rates; -} - /* frame sending functions */ static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len) @@ -137,113 +88,9 @@ static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len) memcpy(skb_put(skb, ies_len), ies, ies_len); } -/* also used by scanning code */ -void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - u8 *ssid, size_t ssid_len) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u8 *pos, *supp_rates, *esupp_rates = NULL; - int i; - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + - sdata->u.sta.ie_probereq_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for probe " - "request\n", sdata->dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_PROBE_REQ); - memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - if (dst) { - memcpy(mgmt->da, dst, ETH_ALEN); - memcpy(mgmt->bssid, dst, ETH_ALEN); - } else { - memset(mgmt->da, 0xff, ETH_ALEN); - memset(mgmt->bssid, 0xff, ETH_ALEN); - } - pos = skb_put(skb, 2 + ssid_len); - *pos++ = WLAN_EID_SSID; - *pos++ = ssid_len; - memcpy(pos, ssid, ssid_len); - - supp_rates = skb_put(skb, 2); - supp_rates[0] = WLAN_EID_SUPP_RATES; - supp_rates[1] = 0; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *rate = &sband->bitrates[i]; - if (esupp_rates) { - pos = skb_put(skb, 1); - esupp_rates[1]++; - } else if (supp_rates[1] == 8) { - esupp_rates = skb_put(skb, 3); - esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; - esupp_rates[1] = 1; - pos = &esupp_rates[2]; - } else { - pos = skb_put(skb, 1); - supp_rates[1]++; - } - *pos = rate->bitrate / 5; - } - - add_extra_ies(skb, sdata->u.sta.ie_probereq, - sdata->u.sta.ie_probereq_len); - - ieee80211_tx_skb(sdata, skb, 0); -} - -static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, - int transaction, u8 *extra, size_t extra_len, - int encrypt) -{ - struct ieee80211_local *local = sdata->local; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*mgmt) + 6 + extra_len + - sdata->u.sta.ie_auth_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for auth " - "frame\n", sdata->dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); - memset(mgmt, 0, 24 + 6); - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_AUTH); - if (encrypt) - mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg); - mgmt->u.auth.auth_transaction = cpu_to_le16(transaction); - ifsta->auth_transaction = transaction + 1; - mgmt->u.auth.status_code = cpu_to_le16(0); - if (extra) - memcpy(skb_put(skb, extra_len), extra, extra_len); - add_extra_ies(skb, sdata->u.sta.ie_auth, sdata->u.sta.ie_auth_len); - - ieee80211_tx_skb(sdata, skb, encrypt); -} - -static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; @@ -256,17 +103,17 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, u32 rates = 0; size_t e_ies_len; - if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { - e_ies = sdata->u.sta.ie_reassocreq; - e_ies_len = sdata->u.sta.ie_reassocreq_len; + if (ifmgd->flags & IEEE80211_IBSS_PREV_BSSID_SET) { + e_ies = sdata->u.mgd.ie_reassocreq; + e_ies_len = sdata->u.mgd.ie_reassocreq_len; } else { - e_ies = sdata->u.sta.ie_assocreq; - e_ies_len = sdata->u.sta.ie_assocreq_len; + e_ies = sdata->u.mgd.ie_assocreq; + e_ies_len = sdata->u.mgd.ie_assocreq_len; } skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*mgmt) + 200 + ifsta->extra_ie_len + - ifsta->ssid_len + e_ies_len); + sizeof(*mgmt) + 200 + ifmgd->extra_ie_len + + ifmgd->ssid_len + e_ies_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " "frame\n", sdata->dev->name); @@ -276,7 +123,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - capab = ifsta->capab; + capab = ifmgd->capab; if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ) { if (!(local->hw.flags & IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE)) @@ -285,9 +132,9 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; } - bss = ieee80211_rx_bss_get(local, ifsta->bssid, + bss = ieee80211_rx_bss_get(local, ifmgd->bssid, local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); if (bss) { if (bss->cbss.capability & WLAN_CAPABILITY_PRIVACY) capab |= WLAN_CAPABILITY_PRIVACY; @@ -312,18 +159,18 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); memset(mgmt, 0, 24); - memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); + memcpy(mgmt->da, ifmgd->bssid, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + memcpy(mgmt->bssid, ifmgd->bssid, ETH_ALEN); - if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { + if (ifmgd->flags & IEEE80211_STA_PREV_BSSID_SET) { skb_put(skb, 10); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_REQ); mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab); mgmt->u.reassoc_req.listen_interval = cpu_to_le16(local->hw.conf.listen_interval); - memcpy(mgmt->u.reassoc_req.current_ap, ifsta->prev_bssid, + memcpy(mgmt->u.reassoc_req.current_ap, ifmgd->prev_bssid, ETH_ALEN); } else { skb_put(skb, 4); @@ -335,10 +182,10 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, } /* SSID */ - ies = pos = skb_put(skb, 2 + ifsta->ssid_len); + ies = pos = skb_put(skb, 2 + ifmgd->ssid_len); *pos++ = WLAN_EID_SSID; - *pos++ = ifsta->ssid_len; - memcpy(pos, ifsta->ssid, ifsta->ssid_len); + *pos++ = ifmgd->ssid_len; + memcpy(pos, ifmgd->ssid, ifmgd->ssid_len); /* add all rates which were marked to be used above */ supp_rates_len = rates_len; @@ -393,12 +240,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, } } - if (ifsta->extra_ie) { - pos = skb_put(skb, ifsta->extra_ie_len); - memcpy(pos, ifsta->extra_ie, ifsta->extra_ie_len); + if (ifmgd->extra_ie) { + pos = skb_put(skb, ifmgd->extra_ie_len); + memcpy(pos, ifmgd->extra_ie, ifmgd->extra_ie_len); } - if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { + if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED)) { pos = skb_put(skb, 9); *pos++ = WLAN_EID_VENDOR_SPECIFIC; *pos++ = 7; /* len */ @@ -418,11 +265,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, * mode (11a/b/g) if any one of these ciphers is * configured as pairwise. */ - if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && + if (wmm && (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) && sband->ht_cap.ht_supported && (ht_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_INFORMATION)) && ht_ie[1] >= sizeof(struct ieee80211_ht_info) && - (!(ifsta->flags & IEEE80211_STA_TKIP_WEP_USED))) { + (!(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED))) { struct ieee80211_ht_info *ht_info = (struct ieee80211_ht_info *)(ht_ie + 2); u16 cap = sband->ht_cap.cap; @@ -459,11 +306,11 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, add_extra_ies(skb, e_ies, e_ies_len); - kfree(ifsta->assocreq_ies); - ifsta->assocreq_ies_len = (skb->data + skb->len) - ies; - ifsta->assocreq_ies = kmalloc(ifsta->assocreq_ies_len, GFP_KERNEL); - if (ifsta->assocreq_ies) - memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); + kfree(ifmgd->assocreq_ies); + ifmgd->assocreq_ies_len = (skb->data + skb->len) - ies; + ifmgd->assocreq_ies = kmalloc(ifmgd->assocreq_ies_len, GFP_KERNEL); + if (ifmgd->assocreq_ies) + memcpy(ifmgd->assocreq_ies, ies, ifmgd->assocreq_ies_len); ieee80211_tx_skb(sdata, skb, 0); } @@ -473,18 +320,18 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, u16 stype, u16 reason) { struct ieee80211_local *local = sdata->local; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *ies; size_t ies_len; if (stype == IEEE80211_STYPE_DEAUTH) { - ies = sdata->u.sta.ie_deauth; - ies_len = sdata->u.sta.ie_deauth_len; + ies = sdata->u.mgd.ie_deauth; + ies_len = sdata->u.mgd.ie_deauth_len; } else { - ies = sdata->u.sta.ie_disassoc; - ies_len = sdata->u.sta.ie_disassoc_len; + ies = sdata->u.mgd.ie_disassoc; + ies_len = sdata->u.mgd.ie_disassoc_len; } skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + @@ -498,9 +345,9 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); memset(mgmt, 0, 24); - memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); + memcpy(mgmt->da, ifmgd->bssid, ETH_ALEN); memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + memcpy(mgmt->bssid, ifmgd->bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); skb_put(skb, 2); /* u.deauth.reason_code == u.disassoc.reason_code */ @@ -508,13 +355,13 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, add_extra_ies(skb, ies, ies_len); - ieee80211_tx_skb(sdata, skb, ifsta->flags & IEEE80211_STA_MFP_ENABLED); + ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED); } void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_pspoll *pspoll; struct sk_buff *skb; u16 fc; @@ -531,43 +378,20 @@ void ieee80211_send_pspoll(struct ieee80211_local *local, memset(pspoll, 0, sizeof(*pspoll)); fc = IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL | IEEE80211_FCTL_PM; pspoll->frame_control = cpu_to_le16(fc); - pspoll->aid = cpu_to_le16(ifsta->aid); + pspoll->aid = cpu_to_le16(ifmgd->aid); /* aid in PS-Poll has its two MSBs each set to 1 */ pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14); - memcpy(pspoll->bssid, ifsta->bssid, ETH_ALEN); + memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN); memcpy(pspoll->ta, sdata->dev->dev_addr, ETH_ALEN); ieee80211_tx_skb(sdata, skb, 0); - - return; } /* MLME */ -static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, - const size_t supp_rates_len, - const u8 *supp_rates) -{ - struct ieee80211_local *local = sdata->local; - int i, have_higher_than_11mbit = 0; - - /* cf. IEEE 802.11 9.2.12 */ - for (i = 0; i < supp_rates_len; i++) - if ((supp_rates[i] & 0x7f) * 5 > 110) - have_higher_than_11mbit = 1; - - if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && - have_higher_than_11mbit) - sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; - else - sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - - ieee80211_set_wmm_default(sdata); -} - static void ieee80211_sta_wmm_params(struct ieee80211_local *local, - struct ieee80211_if_sta *ifsta, + struct ieee80211_if_managed *ifmgd, u8 *wmm_param, size_t wmm_param_len) { struct ieee80211_tx_queue_params params; @@ -575,7 +399,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, int count; u8 *pos; - if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) + if (!(ifmgd->flags & IEEE80211_STA_WMM_ENABLED)) return; if (!wmm_param) @@ -584,9 +408,9 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) return; count = wmm_param[6] & 0x0f; - if (count == ifsta->wmm_last_param_set) + if (count == ifmgd->wmm_last_param_set) return; - ifsta->wmm_last_param_set = count; + ifmgd->wmm_last_param_set = count; pos = wmm_param + 8; left = wmm_param_len - 8; @@ -671,7 +495,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, { struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf; #ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; #endif u32 changed = 0; bool use_protection; @@ -694,7 +518,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: CTS protection %s (BSSID=%pM)\n", sdata->dev->name, use_protection ? "enabled" : "disabled", - ifsta->bssid); + ifmgd->bssid); } #endif bss_conf->use_cts_prot = use_protection; @@ -708,7 +532,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, " (BSSID=%pM)\n", sdata->dev->name, use_short_preamble ? "short" : "long", - ifsta->bssid); + ifmgd->bssid); } #endif bss_conf->use_short_preamble = use_short_preamble; @@ -722,7 +546,7 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, " (BSSID=%pM)\n", sdata->dev->name, use_short_slot ? "short" : "long", - ifsta->bssid); + ifmgd->bssid); } #endif bss_conf->use_short_slot = use_short_slot; @@ -732,57 +556,57 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, return changed; } -static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata) { union iwreq_data wrqu; + memset(&wrqu, 0, sizeof(wrqu)); - if (ifsta->flags & IEEE80211_STA_ASSOCIATED) - memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); + if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) + memcpy(wrqu.ap_addr.sa_data, sdata->u.mgd.bssid, ETH_ALEN); wrqu.ap_addr.sa_family = ARPHRD_ETHER; wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); } -static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; char *buf; size_t len; int i; union iwreq_data wrqu; - if (!ifsta->assocreq_ies && !ifsta->assocresp_ies) + if (!ifmgd->assocreq_ies && !ifmgd->assocresp_ies) return; - buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + - ifsta->assocresp_ies_len), GFP_KERNEL); + buf = kmalloc(50 + 2 * (ifmgd->assocreq_ies_len + + ifmgd->assocresp_ies_len), GFP_KERNEL); if (!buf) return; len = sprintf(buf, "ASSOCINFO("); - if (ifsta->assocreq_ies) { + if (ifmgd->assocreq_ies) { len += sprintf(buf + len, "ReqIEs="); - for (i = 0; i < ifsta->assocreq_ies_len; i++) { + for (i = 0; i < ifmgd->assocreq_ies_len; i++) { len += sprintf(buf + len, "%02x", - ifsta->assocreq_ies[i]); + ifmgd->assocreq_ies[i]); } } - if (ifsta->assocresp_ies) { - if (ifsta->assocreq_ies) + if (ifmgd->assocresp_ies) { + if (ifmgd->assocreq_ies) len += sprintf(buf + len, " "); len += sprintf(buf + len, "RespIEs="); - for (i = 0; i < ifsta->assocresp_ies_len; i++) { + for (i = 0; i < ifmgd->assocresp_ies_len; i++) { len += sprintf(buf + len, "%02x", - ifsta->assocresp_ies[i]); + ifmgd->assocresp_ies[i]); } } len += sprintf(buf + len, ")"); if (len > IW_CUSTOM_MAX) { len = sprintf(buf, "ASSOCRESPIE="); - for (i = 0; i < ifsta->assocresp_ies_len; i++) { + for (i = 0; i < ifmgd->assocresp_ies_len; i++) { len += sprintf(buf + len, "%02x", - ifsta->assocresp_ies[i]); + ifmgd->assocresp_ies[i]); } } @@ -797,20 +621,20 @@ static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, u32 bss_info_changed) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct ieee80211_conf *conf = &local_to_hw(local)->conf; struct ieee80211_bss *bss; bss_info_changed |= BSS_CHANGED_ASSOC; - ifsta->flags |= IEEE80211_STA_ASSOCIATED; + ifmgd->flags |= IEEE80211_STA_ASSOCIATED; - bss = ieee80211_rx_bss_get(local, ifsta->bssid, + bss = ieee80211_rx_bss_get(local, ifmgd->bssid, conf->channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); if (bss) { /* set timing information */ sdata->vif.bss_conf.beacon_int = bss->cbss.beacon_interval; @@ -823,11 +647,11 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, ieee80211_rx_bss_put(local, bss); } - ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; - memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); - ieee80211_sta_send_associnfo(sdata, ifsta); + ifmgd->flags |= IEEE80211_STA_PREV_BSSID_SET; + memcpy(ifmgd->prev_bssid, sdata->u.mgd.bssid, ETH_ALEN); + ieee80211_sta_send_associnfo(sdata); - ifsta->last_probe = jiffies; + ifmgd->last_probe = jiffies; ieee80211_led_assoc(local, 1); sdata->vif.bss_conf.assoc = 1; @@ -856,70 +680,74 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, netif_tx_start_all_queues(sdata->dev); netif_carrier_on(sdata->dev); - ieee80211_sta_send_apinfo(sdata, ifsta); + ieee80211_sta_send_apinfo(sdata); } -static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata) { - ifsta->direct_probe_tries++; - if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + ifmgd->direct_probe_tries++; + if (ifmgd->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { printk(KERN_DEBUG "%s: direct probe to AP %pM timed out\n", - sdata->dev->name, ifsta->bssid); - ifsta->state = IEEE80211_STA_MLME_DISABLED; - ieee80211_sta_send_apinfo(sdata, ifsta); + sdata->dev->name, ifmgd->bssid); + ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_sta_send_apinfo(sdata); /* * Most likely AP is not in the range so remove the * bss information associated to the AP */ - ieee80211_rx_bss_remove(sdata, ifsta->bssid, + ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); return; } printk(KERN_DEBUG "%s: direct probe to AP %pM try %d\n", - sdata->dev->name, ifsta->bssid, - ifsta->direct_probe_tries); + sdata->dev->name, ifmgd->bssid, + ifmgd->direct_probe_tries); - ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; + ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE; - set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request); + set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifmgd->request); /* Direct probe is sent to broadcast address as some APs * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); - mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); + mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } -static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata) { - ifsta->auth_tries++; - if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + ifmgd->auth_tries++; + if (ifmgd->auth_tries > IEEE80211_AUTH_MAX_TRIES) { printk(KERN_DEBUG "%s: authentication with AP %pM" " timed out\n", - sdata->dev->name, ifsta->bssid); - ifsta->state = IEEE80211_STA_MLME_DISABLED; - ieee80211_sta_send_apinfo(sdata, ifsta); - ieee80211_rx_bss_remove(sdata, ifsta->bssid, + sdata->dev->name, ifmgd->bssid); + ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_sta_send_apinfo(sdata); + ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); return; } - ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; + ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE; printk(KERN_DEBUG "%s: authenticate with AP %pM\n", - sdata->dev->name, ifsta->bssid); + sdata->dev->name, ifmgd->bssid); - ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0); + ieee80211_send_auth(sdata, 1, ifmgd->auth_alg, NULL, 0, + ifmgd->bssid, 0); + ifmgd->auth_transaction = 2; - mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); + mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } /* @@ -927,27 +755,28 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, * if self disconnected or a reason code from the AP. */ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, bool deauth, - bool self_disconnected, u16 reason) + bool deauth, bool self_disconnected, + u16 reason) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct sta_info *sta; u32 changed = 0, config_changed = 0; rcu_read_lock(); - sta = sta_info_get(local, ifsta->bssid); + sta = sta_info_get(local, ifmgd->bssid); if (!sta) { rcu_read_unlock(); return; } if (deauth) { - ifsta->direct_probe_tries = 0; - ifsta->auth_tries = 0; + ifmgd->direct_probe_tries = 0; + ifmgd->auth_tries = 0; } - ifsta->assoc_scan_tries = 0; - ifsta->assoc_tries = 0; + ifmgd->assoc_scan_tries = 0; + ifmgd->assoc_tries = 0; netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); @@ -963,20 +792,20 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, IEEE80211_STYPE_DISASSOC, reason); } - ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + ifmgd->flags &= ~IEEE80211_STA_ASSOCIATED; changed |= ieee80211_reset_erp_info(sdata); ieee80211_led_assoc(local, 0); changed |= BSS_CHANGED_ASSOC; sdata->vif.bss_conf.assoc = false; - ieee80211_sta_send_apinfo(sdata, ifsta); + ieee80211_sta_send_apinfo(sdata); if (self_disconnected || reason == WLAN_REASON_DISASSOC_STA_HAS_LEFT) { - ifsta->state = IEEE80211_STA_MLME_DISABLED; - ieee80211_rx_bss_remove(sdata, ifsta->bssid, + ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); } rcu_read_unlock(); @@ -999,7 +828,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); - sta = sta_info_get(local, ifsta->bssid); + sta = sta_info_get(local, ifmgd->bssid); if (!sta) { rcu_read_unlock(); return; @@ -1020,27 +849,27 @@ static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata) return 1; } -static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct ieee80211_bss *bss; int bss_privacy; int wep_privacy; int privacy_invoked; - if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL)) + if (!ifmgd || (ifmgd->flags & IEEE80211_STA_MIXED_CELL)) return 0; - bss = ieee80211_rx_bss_get(local, ifsta->bssid, + bss = ieee80211_rx_bss_get(local, ifmgd->bssid, local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); if (!bss) return 0; bss_privacy = !!(bss->cbss.capability & WLAN_CAPABILITY_PRIVACY); wep_privacy = !!ieee80211_sta_wep_configured(sdata); - privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); + privacy_invoked = !!(ifmgd->flags & IEEE80211_STA_PRIVACY_INVOKED); ieee80211_rx_bss_put(local, bss); @@ -1050,41 +879,42 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, return 1; } -static void ieee80211_associate(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_associate(struct ieee80211_sub_if_data *sdata) { - ifsta->assoc_tries++; - if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + ifmgd->assoc_tries++; + if (ifmgd->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { printk(KERN_DEBUG "%s: association with AP %pM" " timed out\n", - sdata->dev->name, ifsta->bssid); - ifsta->state = IEEE80211_STA_MLME_DISABLED; - ieee80211_sta_send_apinfo(sdata, ifsta); - ieee80211_rx_bss_remove(sdata, ifsta->bssid, + sdata->dev->name, ifmgd->bssid); + ifmgd->state = IEEE80211_STA_MLME_DISABLED; + ieee80211_sta_send_apinfo(sdata); + ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); return; } - ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; + ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE; printk(KERN_DEBUG "%s: associate with AP %pM\n", - sdata->dev->name, ifsta->bssid); - if (ieee80211_privacy_mismatch(sdata, ifsta)) { + sdata->dev->name, ifmgd->bssid); + if (ieee80211_privacy_mismatch(sdata)) { printk(KERN_DEBUG "%s: mismatch in privacy configuration and " "mixed-cell disabled - abort association\n", sdata->dev->name); - ifsta->state = IEEE80211_STA_MLME_DISABLED; + ifmgd->state = IEEE80211_STA_MLME_DISABLED; return; } - ieee80211_send_assoc(sdata, ifsta); + ieee80211_send_assoc(sdata); - mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); + mod_timer(&ifmgd->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); } -static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct sta_info *sta; int disassoc; @@ -1094,38 +924,38 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, * for better APs. */ /* TODO: remove expired BSSes */ - ifsta->state = IEEE80211_STA_MLME_ASSOCIATED; + ifmgd->state = IEEE80211_STA_MLME_ASSOCIATED; rcu_read_lock(); - sta = sta_info_get(local, ifsta->bssid); + sta = sta_info_get(local, ifmgd->bssid); if (!sta) { printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n", - sdata->dev->name, ifsta->bssid); + sdata->dev->name, ifmgd->bssid); disassoc = 1; } else { disassoc = 0; if (time_after(jiffies, sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { - if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { + if (ifmgd->flags & IEEE80211_STA_PROBEREQ_POLL) { printk(KERN_DEBUG "%s: No ProbeResp from " "current AP %pM - assume out of " "range\n", - sdata->dev->name, ifsta->bssid); + sdata->dev->name, ifmgd->bssid); disassoc = 1; } else - ieee80211_send_probe_req(sdata, ifsta->bssid, - ifsta->ssid, - ifsta->ssid_len); - ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; + ieee80211_send_probe_req(sdata, ifmgd->bssid, + ifmgd->ssid, + ifmgd->ssid_len); + ifmgd->flags ^= IEEE80211_STA_PROBEREQ_POLL; } else { - ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; - if (time_after(jiffies, ifsta->last_probe + + ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL; + if (time_after(jiffies, ifmgd->last_probe + IEEE80211_PROBE_INTERVAL)) { - ifsta->last_probe = jiffies; - ieee80211_send_probe_req(sdata, ifsta->bssid, - ifsta->ssid, - ifsta->ssid_len); + ifmgd->last_probe = jiffies; + ieee80211_send_probe_req(sdata, ifmgd->bssid, + ifmgd->ssid, + ifmgd->ssid_len); } } } @@ -1133,25 +963,25 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (disassoc) - ieee80211_set_disassoc(sdata, ifsta, true, true, + ieee80211_set_disassoc(sdata, true, true, WLAN_REASON_PREV_AUTH_NOT_VALID); else - mod_timer(&ifsta->timer, jiffies + + mod_timer(&ifmgd->timer, jiffies + IEEE80211_MONITORING_INTERVAL); } -static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name); - ifsta->flags |= IEEE80211_STA_AUTHENTICATED; - ieee80211_associate(sdata, ifsta); + ifmgd->flags |= IEEE80211_STA_AUTHENTICATED; + ieee80211_associate(sdata); } static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) { @@ -1162,59 +992,37 @@ static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); if (!elems.challenge) return; - ieee80211_send_auth(sdata, ifsta, 3, elems.challenge - 2, - elems.challenge_len + 2, 1); -} - -static void ieee80211_rx_mgmt_auth_ibss(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, - struct ieee80211_mgmt *mgmt, - size_t len) -{ - u16 auth_alg, auth_transaction, status_code; - - if (len < 24 + 6) - return; - - auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); - auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); - status_code = le16_to_cpu(mgmt->u.auth.status_code); - - /* - * IEEE 802.11 standard does not require authentication in IBSS - * networks and most implementations do not seem to use it. - * However, try to reply to authentication attempts if someone - * has actually implemented this. - */ - if (auth_alg == WLAN_AUTH_OPEN && auth_transaction == 1) - ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0); + ieee80211_send_auth(sdata, 3, sdata->u.mgd.auth_alg, + elems.challenge - 2, elems.challenge_len + 2, + sdata->u.mgd.bssid, 1); + sdata->u.mgd.auth_transaction = 4; } static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 auth_alg, auth_transaction, status_code; - if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE) + if (ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE) return; if (len < 24 + 6) return; - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) + if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN) != 0) return; - if (memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) + if (memcmp(ifmgd->bssid, mgmt->bssid, ETH_ALEN) != 0) return; auth_alg = le16_to_cpu(mgmt->u.auth.auth_alg); auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); - if (auth_alg != ifsta->auth_alg || - auth_transaction != ifsta->auth_transaction) + if (auth_alg != ifmgd->auth_alg || + auth_transaction != ifmgd->auth_transaction) return; if (status_code != WLAN_STATUS_SUCCESS) { @@ -1223,15 +1031,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, const int num_algs = ARRAY_SIZE(algs); int i, pos; algs[0] = algs[1] = algs[2] = 0xff; - if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN) + if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_OPEN) algs[0] = WLAN_AUTH_OPEN; - if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) + if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) algs[1] = WLAN_AUTH_SHARED_KEY; - if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP) + if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP) algs[2] = WLAN_AUTH_LEAP; - if (ifsta->auth_alg == WLAN_AUTH_OPEN) + if (ifmgd->auth_alg == WLAN_AUTH_OPEN) pos = 0; - else if (ifsta->auth_alg == WLAN_AUTH_SHARED_KEY) + else if (ifmgd->auth_alg == WLAN_AUTH_SHARED_KEY) pos = 1; else pos = 2; @@ -1239,101 +1047,101 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, pos++; if (pos >= num_algs) pos = 0; - if (algs[pos] == ifsta->auth_alg || + if (algs[pos] == ifmgd->auth_alg || algs[pos] == 0xff) continue; if (algs[pos] == WLAN_AUTH_SHARED_KEY && !ieee80211_sta_wep_configured(sdata)) continue; - ifsta->auth_alg = algs[pos]; + ifmgd->auth_alg = algs[pos]; break; } } return; } - switch (ifsta->auth_alg) { + switch (ifmgd->auth_alg) { case WLAN_AUTH_OPEN: case WLAN_AUTH_LEAP: - ieee80211_auth_completed(sdata, ifsta); + ieee80211_auth_completed(sdata); break; case WLAN_AUTH_SHARED_KEY: - if (ifsta->auth_transaction == 4) - ieee80211_auth_completed(sdata, ifsta); + if (ifmgd->auth_transaction == 4) + ieee80211_auth_completed(sdata); else - ieee80211_auth_challenge(sdata, ifsta, mgmt, len); + ieee80211_auth_challenge(sdata, mgmt, len); break; } } static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code; if (len < 24 + 2) return; - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) + if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN)) return; reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); - if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) + if (ifmgd->flags & IEEE80211_STA_AUTHENTICATED) printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n", sdata->dev->name, reason_code); - if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE || - ifsta->state == IEEE80211_STA_MLME_ASSOCIATE || - ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { - ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; - mod_timer(&ifsta->timer, jiffies + + if (ifmgd->state == IEEE80211_STA_MLME_AUTHENTICATE || + ifmgd->state == IEEE80211_STA_MLME_ASSOCIATE || + ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) { + ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE; + mod_timer(&ifmgd->timer, jiffies + IEEE80211_RETRY_AUTH_INTERVAL); } - ieee80211_set_disassoc(sdata, ifsta, true, false, 0); - ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; + ieee80211_set_disassoc(sdata, true, false, 0); + ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED; } static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; u16 reason_code; if (len < 24 + 2) return; - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN)) + if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN)) return; reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); - if (ifsta->flags & IEEE80211_STA_ASSOCIATED) + if (ifmgd->flags & IEEE80211_STA_ASSOCIATED) printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n", sdata->dev->name, reason_code); - if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { - ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; - mod_timer(&ifsta->timer, jiffies + + if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) { + ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE; + mod_timer(&ifmgd->timer, jiffies + IEEE80211_RETRY_AUTH_INTERVAL); } - ieee80211_set_disassoc(sdata, ifsta, false, false, reason_code); + ieee80211_set_disassoc(sdata, false, false, reason_code); } static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len, int reassoc) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; struct sta_info *sta; @@ -1350,13 +1158,13 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* AssocResp and ReassocResp have identical structure, so process both * of them in this function. */ - if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATE) + if (ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE) return; if (len < 24 + 6) return; - if (memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) + if (memcmp(ifmgd->bssid, mgmt->sa, ETH_ALEN) != 0) return; capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); @@ -1381,7 +1189,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, "comeback duration %u TU (%u ms)\n", sdata->dev->name, tu, ms); if (ms > IEEE80211_ASSOC_TIMEOUT) - mod_timer(&ifsta->timer, + mod_timer(&ifmgd->timer, jiffies + msecs_to_jiffies(ms)); return; } @@ -1392,7 +1200,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* if this was a reassociation, ensure we try a "full" * association next time. This works around some broken APs * which do not correctly reject reassociation requests. */ - ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; + ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; return; } @@ -1408,23 +1216,23 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, } printk(KERN_DEBUG "%s: associated\n", sdata->dev->name); - ifsta->aid = aid; - ifsta->ap_capab = capab_info; + ifmgd->aid = aid; + ifmgd->ap_capab = capab_info; - kfree(ifsta->assocresp_ies); - ifsta->assocresp_ies_len = len - (pos - (u8 *) mgmt); - ifsta->assocresp_ies = kmalloc(ifsta->assocresp_ies_len, GFP_KERNEL); - if (ifsta->assocresp_ies) - memcpy(ifsta->assocresp_ies, pos, ifsta->assocresp_ies_len); + kfree(ifmgd->assocresp_ies); + ifmgd->assocresp_ies_len = len - (pos - (u8 *) mgmt); + ifmgd->assocresp_ies = kmalloc(ifmgd->assocresp_ies_len, GFP_KERNEL); + if (ifmgd->assocresp_ies) + memcpy(ifmgd->assocresp_ies, pos, ifmgd->assocresp_ies_len); rcu_read_lock(); /* Add STA entry for the AP */ - sta = sta_info_get(local, ifsta->bssid); + sta = sta_info_get(local, ifmgd->bssid); if (!sta) { newsta = true; - sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC); + sta = sta_info_alloc(sdata, ifmgd->bssid, GFP_ATOMIC); if (!sta) { printk(KERN_DEBUG "%s: failed to alloc STA entry for" " the AP\n", sdata->dev->name); @@ -1505,7 +1313,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, rate_control_rate_init(sta); - if (ifsta->flags & IEEE80211_STA_MFP_ENABLED) + if (ifmgd->flags & IEEE80211_STA_MFP_ENABLED) set_sta_flags(sta, WLAN_STA_MFP); if (elems.wmm_param) @@ -1524,12 +1332,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, rcu_read_unlock(); if (elems.wmm_param) - ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, + ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, elems.wmm_param_len); if (elems.ht_info_elem && elems.wmm_param && - (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && - !(ifsta->flags & IEEE80211_STA_TKIP_WEP_USED)) + (ifmgd->flags & IEEE80211_STA_WMM_ENABLED) && + !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED)) changed |= ieee80211_enable_ht(sdata, elems.ht_info_elem, ap_ht_cap_flags); @@ -1537,163 +1345,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * ieee80211_set_associated() will tell the driver */ bss_conf->aid = aid; bss_conf->assoc_capability = capab_info; - ieee80211_set_associated(sdata, ifsta, changed); + ieee80211_set_associated(sdata, changed); - ieee80211_associated(sdata, ifsta); + ieee80211_associated(sdata); } -static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, - const u8 *bssid, const int beacon_int, - const int freq, - const size_t supp_rates_len, - const u8 *supp_rates, - const u16 capability) -{ - struct ieee80211_local *local = sdata->local; - int res = 0, rates, i, j; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u8 *pos; - struct ieee80211_supported_band *sband; - union iwreq_data wrqu; - - if (local->ops->reset_tsf) { - /* Reset own TSF to allow time synchronization work. */ - local->ops->reset_tsf(local_to_hw(local)); - } - - if ((ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) && - memcmp(ifsta->bssid, bssid, ETH_ALEN) == 0) - return res; - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400 + - sdata->u.sta.ie_proberesp_len); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for probe " - "response\n", sdata->dev->name); - return -ENOMEM; - } - - if (!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) { - /* Remove possible STA entries from other IBSS networks. */ - sta_info_flush_delayed(sdata); - } - - memcpy(ifsta->bssid, bssid, ETH_ALEN); - res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); - if (res) - return res; - - local->hw.conf.beacon_int = beacon_int >= 10 ? beacon_int : 10; - - sdata->drop_unencrypted = capability & - WLAN_CAPABILITY_PRIVACY ? 1 : 0; - - res = ieee80211_set_freq(sdata, freq); - - if (res) - return res; - - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - /* Build IBSS probe response */ - - skb_reserve(skb, local->hw.extra_tx_headroom); - - mgmt = (struct ieee80211_mgmt *) - skb_put(skb, 24 + sizeof(mgmt->u.beacon)); - memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); - mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | - IEEE80211_STYPE_PROBE_RESP); - memset(mgmt->da, 0xff, ETH_ALEN); - memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->u.beacon.beacon_int = - cpu_to_le16(local->hw.conf.beacon_int); - mgmt->u.beacon.capab_info = cpu_to_le16(capability); - - pos = skb_put(skb, 2 + ifsta->ssid_len); - *pos++ = WLAN_EID_SSID; - *pos++ = ifsta->ssid_len; - memcpy(pos, ifsta->ssid, ifsta->ssid_len); - - rates = supp_rates_len; - if (rates > 8) - rates = 8; - pos = skb_put(skb, 2 + rates); - *pos++ = WLAN_EID_SUPP_RATES; - *pos++ = rates; - memcpy(pos, supp_rates, rates); - - if (sband->band == IEEE80211_BAND_2GHZ) { - pos = skb_put(skb, 2 + 1); - *pos++ = WLAN_EID_DS_PARAMS; - *pos++ = 1; - *pos++ = ieee80211_frequency_to_channel(freq); - } - - pos = skb_put(skb, 2 + 2); - *pos++ = WLAN_EID_IBSS_PARAMS; - *pos++ = 2; - /* FIX: set ATIM window based on scan results */ - *pos++ = 0; - *pos++ = 0; - - if (supp_rates_len > 8) { - rates = supp_rates_len - 8; - pos = skb_put(skb, 2 + rates); - *pos++ = WLAN_EID_EXT_SUPP_RATES; - *pos++ = rates; - memcpy(pos, &supp_rates[8], rates); - } - - add_extra_ies(skb, sdata->u.sta.ie_proberesp, - sdata->u.sta.ie_proberesp_len); - - ifsta->probe_resp = skb; - - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON | - IEEE80211_IFCC_BEACON_ENABLED); - - - rates = 0; - for (i = 0; i < supp_rates_len; i++) { - int bitrate = (supp_rates[i] & 0x7f) * 5; - for (j = 0; j < sband->n_bitrates; j++) - if (sband->bitrates[j].bitrate == bitrate) - rates |= BIT(j); - } - ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates; - - ieee80211_sta_def_wmm_params(sdata, supp_rates_len, supp_rates); - - ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; - ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED; - mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); - - ieee80211_led_assoc(local, true); - - memset(&wrqu, 0, sizeof(wrqu)); - memcpy(wrqu.ap_addr.sa_data, bssid, ETH_ALEN); - wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); - - return res; -} - -static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, - struct ieee80211_bss *bss) -{ - return __ieee80211_sta_join_ibss(sdata, ifsta, - bss->cbss.bssid, - bss->cbss.beacon_interval, - bss->cbss.channel->center_freq, - bss->supp_rates_len, bss->supp_rates, - bss->cbss.capability); -} - static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, @@ -1704,11 +1361,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; int freq; struct ieee80211_bss *bss; - struct sta_info *sta; struct ieee80211_channel *channel; - u64 beacon_timestamp, rx_timestamp; - u32 supp_rates = 0; - enum ieee80211_band band = rx_status->band; if (elems->ds_params && elems->ds_params_len == 1) freq = ieee80211_channel_to_frequency(elems->ds_params[0]); @@ -1720,133 +1373,18 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) return; - if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && - memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) { - supp_rates = ieee80211_sta_get_rates(local, elems, band); - - rcu_read_lock(); - - sta = sta_info_get(local, mgmt->sa); - if (sta) { - u32 prev_rates; - - prev_rates = sta->sta.supp_rates[band]; - /* make sure mandatory rates are always added */ - sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); - -#ifdef CONFIG_MAC80211_IBSS_DEBUG - if (sta->sta.supp_rates[band] != prev_rates) - printk(KERN_DEBUG "%s: updated supp_rates set " - "for %pM based on beacon info (0x%llx | " - "0x%llx -> 0x%llx)\n", - sdata->dev->name, - sta->sta.addr, - (unsigned long long) prev_rates, - (unsigned long long) supp_rates, - (unsigned long long) sta->sta.supp_rates[band]); -#endif - } else { - ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); - } - - rcu_read_unlock(); - } - bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, channel, beacon); if (!bss) return; if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) && - (memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0)) { + (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN) == 0)) { struct ieee80211_channel_sw_ie *sw_elem = (struct ieee80211_channel_sw_ie *)elems->ch_switch_elem; ieee80211_process_chanswitch(sdata, sw_elem, bss); } - /* was just updated in ieee80211_bss_info_update */ - beacon_timestamp = bss->cbss.tsf; - - if (sdata->vif.type != NL80211_IFTYPE_ADHOC) - goto put_bss; - - /* check if we need to merge IBSS */ - - /* merge only on beacons (???) */ - if (!beacon) - goto put_bss; - - /* we use a fixed BSSID */ - if (sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) - goto put_bss; - - /* not an IBSS */ - if (!(bss->cbss.capability & WLAN_CAPABILITY_IBSS)) - goto put_bss; - - /* different channel */ - if (bss->cbss.channel != local->oper_channel) - goto put_bss; - - /* different SSID */ - if (elems->ssid_len != sdata->u.sta.ssid_len || - memcmp(elems->ssid, sdata->u.sta.ssid, - sdata->u.sta.ssid_len)) - goto put_bss; - - if (rx_status->flag & RX_FLAG_TSFT) { - /* - * For correct IBSS merging we need mactime; since mactime is - * defined as the time the first data symbol of the frame hits - * the PHY, and the timestamp of the beacon is defined as "the - * time that the data symbol containing the first bit of the - * timestamp is transmitted to the PHY plus the transmitting - * STA's delays through its local PHY from the MAC-PHY - * interface to its interface with the WM" (802.11 11.1.2) - * - equals the time this bit arrives at the receiver - we have - * to take into account the offset between the two. - * - * E.g. at 1 MBit that means mactime is 192 usec earlier - * (=24 bytes * 8 usecs/byte) than the beacon timestamp. - */ - int rate; - - if (rx_status->flag & RX_FLAG_HT) - rate = 65; /* TODO: HT rates */ - else - rate = local->hw.wiphy->bands[band]-> - bitrates[rx_status->rate_idx].bitrate; - - rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); - } else if (local && local->ops && local->ops->get_tsf) - /* second best option: get current TSF */ - rx_timestamp = local->ops->get_tsf(local_to_hw(local)); - else - /* can't merge without knowing the TSF */ - rx_timestamp = -1LLU; - -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "RX beacon SA=%pM BSSID=" - "%pM TSF=0x%llx BCN=0x%llx diff=%lld @%lu\n", - mgmt->sa, mgmt->bssid, - (unsigned long long)rx_timestamp, - (unsigned long long)beacon_timestamp, - (unsigned long long)(rx_timestamp - beacon_timestamp), - jiffies); -#endif - - if (beacon_timestamp > rx_timestamp) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: beacon TSF higher than " - "local TSF - IBSS merge with BSSID %pM\n", - sdata->dev->name, mgmt->bssid); -#endif - ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss); - ieee80211_ibss_add_sta(sdata, mgmt->bssid, mgmt->sa, supp_rates); - } - - put_bss: ieee80211_rx_bss_put(local, bss); } @@ -1858,7 +1396,6 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, { size_t baselen; struct ieee802_11_elems elems; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) return; /* ignore ProbeResp to foreign address */ @@ -1874,20 +1411,19 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, /* direct probe may be part of the association flow */ if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE, - &ifsta->request)) { + &sdata->u.mgd.request)) { printk(KERN_DEBUG "%s direct probe responded\n", sdata->dev->name); - ieee80211_authenticate(sdata, ifsta); + ieee80211_authenticate(sdata); } } - static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_if_sta *ifsta; + struct ieee80211_if_managed *ifmgd; size_t baselen; struct ieee802_11_elems elems; struct ieee80211_local *local = sdata->local; @@ -1906,21 +1442,22 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type != NL80211_IFTYPE_STATION) return; - ifsta = &sdata->u.sta; - if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED) || - memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) + ifmgd = &sdata->u.mgd; + + if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED) || + memcmp(ifmgd->bssid, mgmt->bssid, ETH_ALEN) != 0) return; if (rx_status->freq != local->hw.conf.channel->center_freq) return; - ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, + ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, elems.wmm_param_len); if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && local->hw.conf.flags & IEEE80211_CONF_PS) { - directed_tim = ieee80211_check_tim(&elems, ifsta->aid); + directed_tim = ieee80211_check_tim(&elems, ifmgd->aid); if (directed_tim) { if (local->hw.conf.dynamic_ps_timeout > 0) { @@ -1956,14 +1493,14 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, if (elems.ht_cap_elem && elems.ht_info_elem && elems.wmm_param && - !(ifsta->flags & IEEE80211_STA_TKIP_WEP_USED)) { + !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED)) { struct sta_info *sta; struct ieee80211_supported_band *sband; u16 ap_ht_cap_flags; rcu_read_lock(); - sta = sta_info_get(local, ifsta->bssid); + sta = sta_info_get(local, ifmgd->bssid); if (!sta) { rcu_read_unlock(); return; @@ -1999,85 +1536,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_bss_info_change_notify(sdata, changed); } - -static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta, - struct ieee80211_mgmt *mgmt, - size_t len) +ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct ieee80211_rx_status *rx_status) { struct ieee80211_local *local = sdata->local; - int tx_last_beacon; - struct sk_buff *skb; - struct ieee80211_mgmt *resp; - u8 *pos, *end; - - if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || - len < 24 + 2 || !ifsta->probe_resp) - return; - - if (local->ops->tx_last_beacon) - tx_last_beacon = local->ops->tx_last_beacon(local_to_hw(local)); - else - tx_last_beacon = 1; - -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: RX ProbeReq SA=%pM DA=%pM BSSID=%pM" - " (tx_last_beacon=%d)\n", - sdata->dev->name, mgmt->sa, mgmt->da, - mgmt->bssid, tx_last_beacon); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - - if (!tx_last_beacon) - return; - - if (memcmp(mgmt->bssid, ifsta->bssid, ETH_ALEN) != 0 && - memcmp(mgmt->bssid, "\xff\xff\xff\xff\xff\xff", ETH_ALEN) != 0) - return; - - end = ((u8 *) mgmt) + len; - pos = mgmt->u.probe_req.variable; - if (pos[0] != WLAN_EID_SSID || - pos + 2 + pos[1] > end) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " - "from %pM\n", - sdata->dev->name, mgmt->sa); -#endif - return; - } - if (pos[1] != 0 && - (pos[1] != ifsta->ssid_len || - memcmp(pos + 2, ifsta->ssid, ifsta->ssid_len) != 0)) { - /* Ignore ProbeReq for foreign SSID */ - return; - } - - /* Reply with ProbeResp */ - skb = skb_copy(ifsta->probe_resp, GFP_KERNEL); - if (!skb) - return; - - resp = (struct ieee80211_mgmt *) skb->data; - memcpy(resp->da, mgmt->sa, ETH_ALEN); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: Sending ProbeResp to %pM\n", - sdata->dev->name, resp->da); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - ieee80211_tx_skb(sdata, skb, 0); -} - -void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, - struct ieee80211_rx_status *rx_status) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_sta *ifsta; struct ieee80211_mgmt *mgmt; u16 fc; if (skb->len < 24) - goto fail; - - ifsta = &sdata->u.sta; + return RX_DROP_MONITOR; mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); @@ -2092,147 +1560,68 @@ void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff * case IEEE80211_STYPE_REASSOC_RESP: case IEEE80211_STYPE_DEAUTH: case IEEE80211_STYPE_DISASSOC: - skb_queue_tail(&ifsta->skb_queue, skb); - queue_work(local->hw.workqueue, &ifsta->work); - return; + skb_queue_tail(&sdata->u.mgd.skb_queue, skb); + queue_work(local->hw.workqueue, &sdata->u.mgd.work); + return RX_QUEUED; } - fail: - kfree_skb(skb); + return RX_DROP_MONITOR; } static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; - struct ieee80211_if_sta *ifsta; struct ieee80211_mgmt *mgmt; u16 fc; - ifsta = &sdata->u.sta; - rx_status = (struct ieee80211_rx_status *) skb->cb; mgmt = (struct ieee80211_mgmt *) skb->data; fc = le16_to_cpu(mgmt->frame_control); - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_PROBE_REQ: - ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, - skb->len); - break; - case IEEE80211_STYPE_PROBE_RESP: - ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, - rx_status); - break; - case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - rx_status); - break; - case IEEE80211_STYPE_AUTH: - ieee80211_rx_mgmt_auth_ibss(sdata, ifsta, mgmt, - skb->len); - break; - } - } else { /* NL80211_IFTYPE_STATION */ - switch (fc & IEEE80211_FCTL_STYPE) { - case IEEE80211_STYPE_PROBE_RESP: - ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, - rx_status); - break; - case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, - rx_status); - break; - case IEEE80211_STYPE_AUTH: - ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len); - break; - case IEEE80211_STYPE_ASSOC_RESP: - ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, - skb->len, 0); - break; - case IEEE80211_STYPE_REASSOC_RESP: - ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, - skb->len, 1); - break; - case IEEE80211_STYPE_DEAUTH: - ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len); - break; - case IEEE80211_STYPE_DISASSOC: - ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, - skb->len); - break; - } + switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_PROBE_RESP: + ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_BEACON: + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_AUTH: + ieee80211_rx_mgmt_auth(sdata, mgmt, skb->len); + break; + case IEEE80211_STYPE_ASSOC_RESP: + ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, 0); + break; + case IEEE80211_STYPE_REASSOC_RESP: + ieee80211_rx_mgmt_assoc_resp(sdata, mgmt, skb->len, 1); + break; + case IEEE80211_STYPE_DEAUTH: + ieee80211_rx_mgmt_deauth(sdata, mgmt, skb->len); + break; + case IEEE80211_STYPE_DISASSOC: + ieee80211_rx_mgmt_disassoc(sdata, mgmt, skb->len); + break; } kfree_skb(skb); } - -static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - int active = 0; - struct sta_info *sta; - - rcu_read_lock(); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sta->sdata == sdata && - time_after(sta->last_rx + IEEE80211_IBSS_MERGE_INTERVAL, - jiffies)) { - active++; - break; - } - } - - rcu_read_unlock(); - - return active; -} - - -static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) -{ - mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); - - ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); - if (ieee80211_sta_active_ibss(sdata)) - return; - - if ((sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) && - (!(sdata->u.sta.flags & IEEE80211_STA_AUTO_CHANNEL_SEL))) - return; - - printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " - "IBSS networks with same SSID (merge)\n", sdata->dev->name); - - /* XXX maybe racy? */ - if (sdata->local->scan_req) - return; - - memcpy(sdata->local->int_scan_req.ssids[0].ssid, - ifsta->ssid, IEEE80211_MAX_SSID_LEN); - sdata->local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; - ieee80211_request_scan(sdata, &sdata->local->int_scan_req); -} - - static void ieee80211_sta_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); - queue_work(local->hw.workqueue, &ifsta->work); + set_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request); + queue_work(local->hw.workqueue, &ifmgd->work); } -static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; if (local->ops->reset_tsf) { @@ -2240,191 +1629,39 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, local->ops->reset_tsf(local_to_hw(local)); } - ifsta->wmm_last_param_set = -1; /* allow any WMM update */ + ifmgd->wmm_last_param_set = -1; /* allow any WMM update */ - if (ifsta->auth_algs & IEEE80211_AUTH_ALG_OPEN) - ifsta->auth_alg = WLAN_AUTH_OPEN; - else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) - ifsta->auth_alg = WLAN_AUTH_SHARED_KEY; - else if (ifsta->auth_algs & IEEE80211_AUTH_ALG_LEAP) - ifsta->auth_alg = WLAN_AUTH_LEAP; + if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_OPEN) + ifmgd->auth_alg = WLAN_AUTH_OPEN; + else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_SHARED_KEY) + ifmgd->auth_alg = WLAN_AUTH_SHARED_KEY; + else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP) + ifmgd->auth_alg = WLAN_AUTH_LEAP; else - ifsta->auth_alg = WLAN_AUTH_OPEN; - ifsta->auth_transaction = -1; - ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; - ifsta->assoc_scan_tries = 0; - ifsta->direct_probe_tries = 0; - ifsta->auth_tries = 0; - ifsta->assoc_tries = 0; + ifmgd->auth_alg = WLAN_AUTH_OPEN; + ifmgd->auth_transaction = -1; + ifmgd->flags &= ~IEEE80211_STA_ASSOCIATED; + ifmgd->assoc_scan_tries = 0; + ifmgd->direct_probe_tries = 0; + ifmgd->auth_tries = 0; + ifmgd->assoc_tries = 0; netif_tx_stop_all_queues(sdata->dev); netif_carrier_off(sdata->dev); } -static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_supported_band *sband; - u8 *pos; - u8 bssid[ETH_ALEN]; - u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; - u16 capability; - int i; - - if (sdata->u.sta.flags & IEEE80211_STA_BSSID_SET) { - memcpy(bssid, ifsta->bssid, ETH_ALEN); - } else { - /* Generate random, not broadcast, locally administered BSSID. Mix in - * own MAC address to make sure that devices that do not have proper - * random number generator get different BSSID. */ - get_random_bytes(bssid, ETH_ALEN); - for (i = 0; i < ETH_ALEN; i++) - bssid[i] ^= sdata->dev->dev_addr[i]; - bssid[0] &= ~0x01; - bssid[0] |= 0x02; - } - - printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %pM\n", - sdata->dev->name, bssid); - - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - if (local->hw.conf.beacon_int == 0) - local->hw.conf.beacon_int = 100; - - capability = WLAN_CAPABILITY_IBSS; - - if (sdata->default_key) - capability |= WLAN_CAPABILITY_PRIVACY; - else - sdata->drop_unencrypted = 0; - - pos = supp_rates; - for (i = 0; i < sband->n_bitrates; i++) { - int rate = sband->bitrates[i].bitrate; - *pos++ = (u8) (rate / 5); - } - - return __ieee80211_sta_join_ibss(sdata, ifsta, - bssid, local->hw.conf.beacon_int, - local->hw.conf.channel->center_freq, - sband->n_bitrates, supp_rates, - capability); -} - - -static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct ieee80211_bss *bss; - int active_ibss; - - if (ifsta->ssid_len == 0) - return -EINVAL; - - active_ibss = ieee80211_sta_active_ibss(sdata); -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", - sdata->dev->name, active_ibss); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - - if (active_ibss) - return 0; - - if (ifsta->flags & IEEE80211_STA_BSSID_SET) - bss = ieee80211_rx_bss_get(local, ifsta->bssid, 0, - ifsta->ssid, ifsta->ssid_len); - else - bss = (void *)cfg80211_get_ibss(local->hw.wiphy, - NULL, - ifsta->ssid, ifsta->ssid_len); - -#ifdef CONFIG_MAC80211_IBSS_DEBUG - if (bss) - printk(KERN_DEBUG " sta_find_ibss: selected %pM current " - "%pM\n", bss->cbss.bssid, ifsta->bssid); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - - if (bss && - (!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) || - memcmp(ifsta->bssid, bss->cbss.bssid, ETH_ALEN))) { - int ret; - - printk(KERN_DEBUG "%s: Selected IBSS BSSID %pM" - " based on configured SSID\n", - sdata->dev->name, bss->cbss.bssid); - - ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); - ieee80211_rx_bss_put(local, bss); - return ret; - } else if (bss) - ieee80211_rx_bss_put(local, bss); - -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG " did not try to join ibss\n"); -#endif /* CONFIG_MAC80211_IBSS_DEBUG */ - - /* Selected IBSS not found in current scan results - try to scan */ - if (ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED && - !ieee80211_sta_active_ibss(sdata)) { - mod_timer(&ifsta->timer, jiffies + - IEEE80211_IBSS_MERGE_INTERVAL); - } else if (time_after(jiffies, local->last_scan_completed + - IEEE80211_SCAN_INTERVAL)) { - printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " - "join\n", sdata->dev->name); - - /* XXX maybe racy? */ - if (local->scan_req) - return -EBUSY; - - memcpy(local->int_scan_req.ssids[0].ssid, - ifsta->ssid, IEEE80211_MAX_SSID_LEN); - local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; - return ieee80211_request_scan(sdata, &local->int_scan_req); - } else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) { - int interval = IEEE80211_SCAN_INTERVAL; - - if (time_after(jiffies, ifsta->ibss_join_req + - IEEE80211_IBSS_JOIN_TIMEOUT)) { - if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) && - (!(local->oper_channel->flags & - IEEE80211_CHAN_NO_IBSS))) - return ieee80211_sta_create_ibss(sdata, ifsta); - if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) { - printk(KERN_DEBUG "%s: IBSS not allowed on" - " %d MHz\n", sdata->dev->name, - local->hw.conf.channel->center_freq); - } - - /* No IBSS found - decrease scan interval and continue - * scanning. */ - interval = IEEE80211_SCAN_INTERVAL_SLOW; - } - - ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; - mod_timer(&ifsta->timer, jiffies + interval); - return 0; - } - - return 0; -} - - -static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_bss *bss; - u8 *bssid = ifsta->bssid, *ssid = ifsta->ssid; - u8 ssid_len = ifsta->ssid_len; + u8 *bssid = ifmgd->bssid, *ssid = ifmgd->ssid; + u8 ssid_len = ifmgd->ssid_len; u16 capa_mask = WLAN_CAPABILITY_ESS; u16 capa_val = WLAN_CAPABILITY_ESS; struct ieee80211_channel *chan = local->oper_channel; - if (ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + if (ifmgd->flags & (IEEE80211_STA_AUTO_SSID_SEL | IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL)) { capa_mask |= WLAN_CAPABILITY_PRIVACY; @@ -2432,13 +1669,13 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, capa_val |= WLAN_CAPABILITY_PRIVACY; } - if (ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) + if (ifmgd->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) chan = NULL; - if (ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) + if (ifmgd->flags & IEEE80211_STA_AUTO_BSSID_SEL) bssid = NULL; - if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) { + if (ifmgd->flags & IEEE80211_STA_AUTO_SSID_SEL) { ssid = NULL; ssid_len = 0; } @@ -2449,16 +1686,16 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, if (bss) { ieee80211_set_freq(sdata, bss->cbss.channel->center_freq); - if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) + if (!(ifmgd->flags & IEEE80211_STA_SSID_SET)) ieee80211_sta_set_ssid(sdata, bss->ssid, bss->ssid_len); ieee80211_sta_set_bssid(sdata, bss->cbss.bssid); ieee80211_sta_def_wmm_params(sdata, bss->supp_rates_len, bss->supp_rates); - if (sdata->u.sta.mfp == IEEE80211_MFP_REQUIRED) - sdata->u.sta.flags |= IEEE80211_STA_MFP_ENABLED; + if (sdata->u.mgd.mfp == IEEE80211_MFP_REQUIRED) + sdata->u.mgd.flags |= IEEE80211_STA_MFP_ENABLED; else - sdata->u.sta.flags &= ~IEEE80211_STA_MFP_ENABLED; + sdata->u.mgd.flags &= ~IEEE80211_STA_MFP_ENABLED; /* Send out direct probe if no probe resp was received or * the one we have is outdated @@ -2466,31 +1703,31 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, if (!bss->last_probe_resp || time_after(jiffies, bss->last_probe_resp + IEEE80211_SCAN_RESULT_EXPIRE)) - ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; + ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE; else - ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; + ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE; ieee80211_rx_bss_put(local, bss); - ieee80211_sta_reset_auth(sdata, ifsta); + ieee80211_sta_reset_auth(sdata); return 0; } else { - if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { - ifsta->assoc_scan_tries++; + if (ifmgd->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { + ifmgd->assoc_scan_tries++; /* XXX maybe racy? */ if (local->scan_req) return -1; memcpy(local->int_scan_req.ssids[0].ssid, - ifsta->ssid, IEEE80211_MAX_SSID_LEN); - if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) + ifmgd->ssid, IEEE80211_MAX_SSID_LEN); + if (ifmgd->flags & IEEE80211_STA_AUTO_SSID_SEL) local->int_scan_req.ssids[0].ssid_len = 0; else - local->int_scan_req.ssids[0].ssid_len = ifsta->ssid_len; + local->int_scan_req.ssids[0].ssid_len = ifmgd->ssid_len; ieee80211_start_scan(sdata, &local->int_scan_req); - ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; - set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); + ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE; + set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); } else { - ifsta->assoc_scan_tries = 0; - ifsta->state = IEEE80211_STA_MLME_DISABLED; + ifmgd->assoc_scan_tries = 0; + ifmgd->state = IEEE80211_STA_MLME_DISABLED; } } return -1; @@ -2500,9 +1737,9 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, static void ieee80211_sta_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.sta.work); + container_of(work, struct ieee80211_sub_if_data, u.mgd.work); struct ieee80211_local *local = sdata->local; - struct ieee80211_if_sta *ifsta; + struct ieee80211_if_managed *ifmgd; struct sk_buff *skb; if (!netif_running(sdata->dev)) @@ -2511,60 +1748,53 @@ static void ieee80211_sta_work(struct work_struct *work) if (local->sw_scanning || local->hw_scanning) return; - if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_ADHOC)) + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) return; - ifsta = &sdata->u.sta; + ifmgd = &sdata->u.mgd; - while ((skb = skb_dequeue(&ifsta->skb_queue))) + while ((skb = skb_dequeue(&ifmgd->skb_queue))) ieee80211_sta_rx_queued_mgmt(sdata, skb); - if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE && - ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && - ifsta->state != IEEE80211_STA_MLME_ASSOCIATE && - test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { + if (ifmgd->state != IEEE80211_STA_MLME_DIRECT_PROBE && + ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE && + ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE && + test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request)) { ieee80211_start_scan(sdata, local->scan_req); return; } - if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) { - if (ieee80211_sta_config_auth(sdata, ifsta)) + if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request)) { + if (ieee80211_sta_config_auth(sdata)) return; - clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); - } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request)) + clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request); + } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request)) return; - switch (ifsta->state) { + switch (ifmgd->state) { case IEEE80211_STA_MLME_DISABLED: break; case IEEE80211_STA_MLME_DIRECT_PROBE: - ieee80211_direct_probe(sdata, ifsta); + ieee80211_direct_probe(sdata); break; case IEEE80211_STA_MLME_AUTHENTICATE: - ieee80211_authenticate(sdata, ifsta); + ieee80211_authenticate(sdata); break; case IEEE80211_STA_MLME_ASSOCIATE: - ieee80211_associate(sdata, ifsta); + ieee80211_associate(sdata); break; case IEEE80211_STA_MLME_ASSOCIATED: - ieee80211_associated(sdata, ifsta); - break; - case IEEE80211_STA_MLME_IBSS_SEARCH: - ieee80211_sta_find_ibss(sdata, ifsta); - break; - case IEEE80211_STA_MLME_IBSS_JOINED: - ieee80211_sta_merge_ibss(sdata, ifsta); + ieee80211_associated(sdata); break; default: WARN_ON(1); break; } - if (ieee80211_privacy_mismatch(sdata, ifsta)) { + if (ieee80211_privacy_mismatch(sdata)) { printk(KERN_DEBUG "%s: privacy configuration mismatch and " "mixed-cell disabled - disassociate\n", sdata->dev->name); - ieee80211_set_disassoc(sdata, ifsta, false, true, + ieee80211_set_disassoc(sdata, false, true, WLAN_REASON_UNSPECIFIED); } } @@ -2573,155 +1803,99 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) { if (sdata->vif.type == NL80211_IFTYPE_STATION) queue_work(sdata->local->hw.workqueue, - &sdata->u.sta.work); + &sdata->u.mgd.work); } /* interface setup */ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_if_sta *ifsta; + struct ieee80211_if_managed *ifmgd; - ifsta = &sdata->u.sta; - INIT_WORK(&ifsta->work, ieee80211_sta_work); - INIT_WORK(&ifsta->chswitch_work, ieee80211_chswitch_work); - setup_timer(&ifsta->timer, ieee80211_sta_timer, + ifmgd = &sdata->u.mgd; + INIT_WORK(&ifmgd->work, ieee80211_sta_work); + INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); + setup_timer(&ifmgd->timer, ieee80211_sta_timer, (unsigned long) sdata); - setup_timer(&ifsta->chswitch_timer, ieee80211_chswitch_timer, + setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, (unsigned long) sdata); - skb_queue_head_init(&ifsta->skb_queue); + skb_queue_head_init(&ifmgd->skb_queue); - ifsta->capab = WLAN_CAPABILITY_ESS; - ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | + ifmgd->capab = WLAN_CAPABILITY_ESS; + ifmgd->auth_algs = IEEE80211_AUTH_ALG_OPEN | IEEE80211_AUTH_ALG_SHARED_KEY; - ifsta->flags |= IEEE80211_STA_CREATE_IBSS | + ifmgd->flags |= IEEE80211_STA_CREATE_IBSS | IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL; if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) - ifsta->flags |= IEEE80211_STA_WMM_ENABLED; -} - -/* - * Add a new IBSS station, will also be called by the RX code when, - * in IBSS mode, receiving a frame from a yet-unknown station, hence - * must be callable in atomic context. - */ -struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, - u8 *bssid,u8 *addr, u32 supp_rates) -{ - struct ieee80211_local *local = sdata->local; - struct sta_info *sta; - int band = local->hw.conf.channel->band; - - /* TODO: Could consider removing the least recently used entry and - * allow new one to be added. */ - if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: No room for a new IBSS STA " - "entry %pM\n", sdata->dev->name, addr); - } - return NULL; - } - - if (compare_ether_addr(bssid, sdata->u.sta.bssid)) - return NULL; - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Adding new IBSS station %pM (dev=%s)\n", - wiphy_name(local->hw.wiphy), addr, sdata->dev->name); -#endif - - sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); - if (!sta) - return NULL; - - set_sta_flags(sta, WLAN_STA_AUTHORIZED); - - /* make sure mandatory rates are always added */ - sta->sta.supp_rates[band] = supp_rates | - ieee80211_mandatory_rates(local, band); - - rate_control_rate_init(sta); - - if (sta_info_insert(sta)) - return NULL; - - return sta; + ifmgd->flags |= IEEE80211_STA_WMM_ENABLED; } /* configuration hooks */ -void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, - struct ieee80211_if_sta *ifsta) +void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; - if (sdata->vif.type != NL80211_IFTYPE_STATION) + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) return; - if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | + if ((ifmgd->flags & (IEEE80211_STA_BSSID_SET | IEEE80211_STA_AUTO_BSSID_SEL)) && - (ifsta->flags & (IEEE80211_STA_SSID_SET | + (ifmgd->flags & (IEEE80211_STA_SSID_SET | IEEE80211_STA_AUTO_SSID_SEL))) { - if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) - ieee80211_set_disassoc(sdata, ifsta, true, true, + if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, true, true, WLAN_REASON_DEAUTH_LEAVING); - set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); - queue_work(local->hw.workqueue, &ifsta->work); + set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); + queue_work(local->hw.workqueue, &ifmgd->work); } } int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) { - struct ieee80211_if_sta *ifsta; + struct ieee80211_if_managed *ifmgd; if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; - ifsta = &sdata->u.sta; + ifmgd = &sdata->u.mgd; - if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) { - memset(ifsta->ssid, 0, sizeof(ifsta->ssid)); - memcpy(ifsta->ssid, ssid, len); - ifsta->ssid_len = len; + if (ifmgd->ssid_len != len || memcmp(ifmgd->ssid, ssid, len) != 0) { + memset(ifmgd->ssid, 0, sizeof(ifmgd->ssid)); + memcpy(ifmgd->ssid, ssid, len); + ifmgd->ssid_len = len; } - ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; + ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; if (len) - ifsta->flags |= IEEE80211_STA_SSID_SET; + ifmgd->flags |= IEEE80211_STA_SSID_SET; else - ifsta->flags &= ~IEEE80211_STA_SSID_SET; - - if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { - ifsta->ibss_join_req = jiffies; - ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; - return ieee80211_sta_find_ibss(sdata, ifsta); - } + ifmgd->flags &= ~IEEE80211_STA_SSID_SET; return 0; } int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - memcpy(ssid, ifsta->ssid, ifsta->ssid_len); - *len = ifsta->ssid_len; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + memcpy(ssid, ifmgd->ssid, ifmgd->ssid_len); + *len = ifmgd->ssid_len; return 0; } int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) { - struct ieee80211_if_sta *ifsta; - - ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (is_valid_ether_addr(bssid)) { - memcpy(ifsta->bssid, bssid, ETH_ALEN); - ifsta->flags |= IEEE80211_STA_BSSID_SET; + memcpy(ifmgd->bssid, bssid, ETH_ALEN); + ifmgd->flags |= IEEE80211_STA_BSSID_SET; } else { - memset(ifsta->bssid, 0, ETH_ALEN); - ifsta->flags &= ~IEEE80211_STA_BSSID_SET; + memset(ifmgd->bssid, 0, ETH_ALEN); + ifmgd->flags &= ~IEEE80211_STA_BSSID_SET; } if (netif_running(sdata->dev)) { @@ -2731,47 +1905,44 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) } } - return ieee80211_sta_set_ssid(sdata, ifsta->ssid, ifsta->ssid_len); + return ieee80211_sta_set_ssid(sdata, ifmgd->ssid, ifmgd->ssid_len); } int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - kfree(ifsta->extra_ie); + kfree(ifmgd->extra_ie); if (len == 0) { - ifsta->extra_ie = NULL; - ifsta->extra_ie_len = 0; + ifmgd->extra_ie = NULL; + ifmgd->extra_ie_len = 0; return 0; } - ifsta->extra_ie = kmalloc(len, GFP_KERNEL); - if (!ifsta->extra_ie) { - ifsta->extra_ie_len = 0; + ifmgd->extra_ie = kmalloc(len, GFP_KERNEL); + if (!ifmgd->extra_ie) { + ifmgd->extra_ie_len = 0; return -ENOMEM; } - memcpy(ifsta->extra_ie, ie, len); - ifsta->extra_ie_len = len; + memcpy(ifmgd->extra_ie, ie, len); + ifmgd->extra_ie_len = len; return 0; } int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", sdata->dev->name, reason); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_ADHOC) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; - ieee80211_set_disassoc(sdata, ifsta, true, true, reason); + ieee80211_set_disassoc(sdata, true, true, reason); return 0; } int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", sdata->dev->name, reason); @@ -2779,10 +1950,10 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; - if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) - return -1; + if (!(ifmgd->flags & IEEE80211_STA_ASSOCIATED)) + return -ENOLINK; - ieee80211_set_disassoc(sdata, ifsta, false, true, reason); + ieee80211_set_disassoc(sdata, false, true, reason); return 0; } @@ -2790,14 +1961,6 @@ int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata = local->scan_sdata; - struct ieee80211_if_sta *ifsta; - - if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) { - ifsta = &sdata->u.sta; - if ((!(ifsta->flags & IEEE80211_STA_PREV_BSSID_SET)) || - !ieee80211_sta_active_ibss(sdata)) - ieee80211_sta_find_ibss(sdata, ifsta); - } /* Restart STA timers */ rcu_read_lock(); @@ -2844,3 +2007,36 @@ void ieee80211_dynamic_ps_timer(unsigned long data) queue_work(local->hw.workqueue, &local->dynamic_ps_enable_work); } + +void ieee80211_send_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int powersave) +{ + struct sk_buff *skb; + struct ieee80211_hdr *nullfunc; + __le16 fc; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION)) + return; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " + "frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); + memset(nullfunc, 0, 24); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_TODS); + if (powersave) + fc |= cpu_to_le16(IEEE80211_FCTL_PM); + nullfunc->frame_control = fc; + memcpy(nullfunc->addr1, sdata->u.mgd.bssid, ETH_ALEN); + memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->u.mgd.bssid, ETH_ALEN); + + ieee80211_tx_skb(sdata, skb, 0); +} diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 1327d424bf31..66f7ecf51b92 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -838,7 +838,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, NL80211_IFTYPE_ADHOC); - if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) + if (compare_ether_addr(bssid, rx->sdata->u.ibss.bssid) == 0) sta->last_rx = jiffies; } else if (!is_multicast_ether_addr(hdr->addr1) || @@ -1702,13 +1702,13 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; } - if (compare_ether_addr(mgmt->sa, sdata->u.sta.bssid) != 0 || - compare_ether_addr(mgmt->bssid, sdata->u.sta.bssid) != 0) { + if (compare_ether_addr(mgmt->sa, sdata->u.mgd.bssid) != 0 || + compare_ether_addr(mgmt->bssid, sdata->u.mgd.bssid) != 0) { /* Not from the current AP. */ return; } - if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATE) { + if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATE) { /* Association in progress; ignore SA Query */ return; } @@ -1727,7 +1727,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, memset(resp, 0, 24); memcpy(resp->da, mgmt->sa, ETH_ALEN); memcpy(resp->sa, sdata->dev->dev_addr, ETH_ALEN); - memcpy(resp->bssid, sdata->u.sta.bssid, ETH_ALEN); + memcpy(resp->bssid, sdata->u.mgd.bssid, ETH_ALEN); resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); @@ -1745,7 +1745,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; struct ieee80211_bss *bss; int len = rx->skb->len; @@ -1803,6 +1802,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) case WLAN_CATEGORY_SPECTRUM_MGMT: if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) return RX_DROP_MONITOR; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return RX_DROP_MONITOR; + switch (mgmt->u.action.u.measurement.action_code) { case WLAN_ACTION_SPCT_MSR_REQ: if (len < (IEEE80211_MIN_ACTION_SIZE + @@ -1815,12 +1818,13 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sizeof(mgmt->u.action.u.chan_switch))) return RX_DROP_MONITOR; - if (memcmp(mgmt->bssid, ifsta->bssid, ETH_ALEN) != 0) + if (memcmp(mgmt->bssid, sdata->u.mgd.bssid, ETH_ALEN)) return RX_DROP_MONITOR; - bss = ieee80211_rx_bss_get(local, ifsta->bssid, + bss = ieee80211_rx_bss_get(local, sdata->u.mgd.bssid, local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + sdata->u.mgd.ssid, + sdata->u.mgd.ssid_len); if (!bss) return RX_DROP_MONITOR; @@ -1876,11 +1880,14 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_ADHOC) return RX_DROP_MONITOR; - if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) - return RX_DROP_MONITOR; - ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); - return RX_QUEUED; + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) + return RX_DROP_MONITOR; + return ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); + } + + return ieee80211_ibss_rx_mgmt(sdata, rx->skb, rx->status); } static void ieee80211_rx_michael_mic_report(struct net_device *dev, @@ -2083,7 +2090,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_STATION: if (!bssid) return 0; - if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { + if (!ieee80211_bssid_match(bssid, sdata->u.mgd.bssid)) { if (!(rx->flags & IEEE80211_RX_IN_SCAN)) return 0; rx->flags &= ~IEEE80211_RX_RA_MATCH; @@ -2101,7 +2108,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, if (ieee80211_is_beacon(hdr->frame_control)) { return 1; } - else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { + else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { if (!(rx->flags & IEEE80211_RX_IN_SCAN)) return 0; rx->flags &= ~IEEE80211_RX_RA_MATCH; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f883ab9f1e6e..08a1fc27ca10 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -207,36 +207,6 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, return RX_QUEUED; } -void ieee80211_send_nullfunc(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - int powersave) -{ - struct sk_buff *skb; - struct ieee80211_hdr *nullfunc; - __le16 fc; - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " - "frame\n", sdata->dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - - nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); - memset(nullfunc, 0, 24); - fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | - IEEE80211_FCTL_TODS); - if (powersave) - fc |= cpu_to_le16(IEEE80211_FCTL_PM); - nullfunc->frame_control = fc; - memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN); - memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN); - - ieee80211_tx_skb(sdata, skb, 0); -} - void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); @@ -287,7 +257,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) /* Tell AP we're back */ if (sdata->vif.type == NL80211_IFTYPE_STATION) { - if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { + if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) { ieee80211_send_nullfunc(local, sdata, 0); netif_tx_wake_all_queues(sdata->dev); } @@ -305,6 +275,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) done: ieee80211_mlme_notify_scan_completed(local); + ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); } EXPORT_SYMBOL(ieee80211_scan_completed); @@ -442,7 +413,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, IEEE80211_IFCC_BEACON_ENABLED); if (sdata->vif.type == NL80211_IFTYPE_STATION) { - if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { + if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) { netif_tx_stop_all_queues(sdata->dev); ieee80211_send_nullfunc(local, sdata, 1); } @@ -477,7 +448,7 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, struct cfg80211_scan_request *req) { struct ieee80211_local *local = sdata->local; - struct ieee80211_if_sta *ifsta; + struct ieee80211_if_managed *ifmgd; if (!req) return -EINVAL; @@ -502,9 +473,9 @@ int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, return -EBUSY; } - ifsta = &sdata->u.sta; - set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); - queue_work(local->hw.workqueue, &ifsta->work); + ifmgd = &sdata->u.mgd; + set_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request); + queue_work(local->hw.workqueue, &ifmgd->work); return 0; } diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 47bb2aed2813..5f7a2624ed74 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -88,16 +88,16 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, void ieee80211_chswitch_work(struct work_struct *work) { struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.sta.chswitch_work); + container_of(work, struct ieee80211_sub_if_data, u.mgd.chswitch_work); struct ieee80211_bss *bss; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; if (!netif_running(sdata->dev)) return; - bss = ieee80211_rx_bss_get(sdata->local, ifsta->bssid, + bss = ieee80211_rx_bss_get(sdata->local, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); + ifmgd->ssid, ifmgd->ssid_len); if (!bss) goto exit; @@ -108,7 +108,7 @@ void ieee80211_chswitch_work(struct work_struct *work) ieee80211_rx_bss_put(sdata->local, bss); exit: - ifsta->flags &= ~IEEE80211_STA_CSA_RECEIVED; + ifmgd->flags &= ~IEEE80211_STA_CSA_RECEIVED; ieee80211_wake_queues_by_reason(&sdata->local->hw, IEEE80211_QUEUE_STOP_REASON_CSA); } @@ -117,9 +117,9 @@ void ieee80211_chswitch_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - queue_work(sdata->local->hw.workqueue, &ifsta->chswitch_work); + queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work); } void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, @@ -127,14 +127,14 @@ void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, struct ieee80211_bss *bss) { struct ieee80211_channel *new_ch; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; int new_freq = ieee80211_channel_to_frequency(sw_elem->new_ch_num); /* FIXME: Handle ADHOC later */ if (sdata->vif.type != NL80211_IFTYPE_STATION) return; - if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATED) + if (ifmgd->state != IEEE80211_STA_MLME_ASSOCIATED) return; if (sdata->local->sw_scanning || sdata->local->hw_scanning) @@ -143,7 +143,7 @@ void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, /* Disregard subsequent beacons if we are already running a timer processing a CSA */ - if (ifsta->flags & IEEE80211_STA_CSA_RECEIVED) + if (ifmgd->flags & IEEE80211_STA_CSA_RECEIVED) return; new_ch = ieee80211_get_channel(sdata->local->hw.wiphy, new_freq); @@ -153,12 +153,12 @@ void ieee80211_process_chanswitch(struct ieee80211_sub_if_data *sdata, sdata->local->csa_channel = new_ch; if (sw_elem->count <= 1) { - queue_work(sdata->local->hw.workqueue, &ifsta->chswitch_work); + queue_work(sdata->local->hw.workqueue, &ifmgd->chswitch_work); } else { ieee80211_stop_queues_by_reason(&sdata->local->hw, IEEE80211_QUEUE_STOP_REASON_CSA); - ifsta->flags |= IEEE80211_STA_CSA_RECEIVED; - mod_timer(&ifsta->chswitch_timer, + ifmgd->flags |= IEEE80211_STA_CSA_RECEIVED; + mod_timer(&ifmgd->chswitch_timer, jiffies + msecs_to_jiffies(sw_elem->count * bss->cbss.beacon_interval)); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 6aca49897d55..c3f0e950125b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1633,7 +1633,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ - memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); + memcpy(hdr.addr1, sdata->u.mgd.bssid, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); memcpy(hdr.addr3, skb->data, ETH_ALEN); hdrlen = 24; @@ -1642,7 +1642,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, sdata->u.sta.bssid, ETH_ALEN); + memcpy(hdr.addr3, sdata->u.ibss.bssid, ETH_ALEN); hdrlen = 24; break; default: @@ -1928,7 +1928,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, struct ieee80211_tx_info *info; struct ieee80211_sub_if_data *sdata = NULL; struct ieee80211_if_ap *ap = NULL; - struct ieee80211_if_sta *ifsta = NULL; struct beacon_data *beacon; struct ieee80211_supported_band *sband; enum ieee80211_band band = local->hw.conf.channel->band; @@ -1980,13 +1979,13 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, } else goto out; } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_hdr *hdr; - ifsta = &sdata->u.sta; - if (!ifsta->probe_resp) + if (!ifibss->probe_resp) goto out; - skb = skb_copy(ifsta->probe_resp, GFP_ATOMIC); + skb = skb_copy(ifibss->probe_resp, GFP_ATOMIC); if (!skb) goto out; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 92ea1770461b..dee17e5cbb89 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -750,6 +750,27 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) local->ops->conf_tx(local_to_hw(local), i, &qparam); } +void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, + const size_t supp_rates_len, + const u8 *supp_rates) +{ + struct ieee80211_local *local = sdata->local; + int i, have_higher_than_11mbit = 0; + + /* cf. IEEE 802.11 9.2.12 */ + for (i = 0; i < supp_rates_len; i++) + if ((supp_rates[i] & 0x7f) * 5 > 110) + have_higher_than_11mbit = 1; + + if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && + have_higher_than_11mbit) + sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; + else + sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; + + ieee80211_set_wmm_default(sdata); +} + void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, int encrypt) { @@ -816,3 +837,158 @@ u32 ieee80211_mandatory_rates(struct ieee80211_local *local, mandatory_rates |= BIT(i); return mandatory_rates; } + +void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, + u16 transaction, u16 auth_alg, + u8 *extra, size_t extra_len, + const u8 *bssid, int encrypt) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + const u8 *ie_auth = NULL; + int ie_auth_len = 0; + + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + ie_auth_len = sdata->u.mgd.ie_auth_len; + ie_auth = sdata->u.mgd.ie_auth; + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*mgmt) + 6 + extra_len + ie_auth_len); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for auth " + "frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); + memset(mgmt, 0, 24 + 6); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_AUTH); + if (encrypt) + mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + memcpy(mgmt->da, bssid, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(mgmt->bssid, bssid, ETH_ALEN); + mgmt->u.auth.auth_alg = cpu_to_le16(auth_alg); + mgmt->u.auth.auth_transaction = cpu_to_le16(transaction); + mgmt->u.auth.status_code = cpu_to_le16(0); + if (extra) + memcpy(skb_put(skb, extra_len), extra, extra_len); + if (ie_auth) + memcpy(skb_put(skb, ie_auth_len), ie_auth, ie_auth_len); + + ieee80211_tx_skb(sdata, skb, encrypt); +} + +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + u8 *ssid, size_t ssid_len) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u8 *pos, *supp_rates, *esupp_rates = NULL, *extra_preq_ie = NULL; + int i, extra_preq_ie_len = 0; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + extra_preq_ie_len = sdata->u.mgd.ie_probereq_len; + extra_preq_ie = sdata->u.mgd.ie_probereq; + break; + default: + break; + } + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + + extra_preq_ie_len); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for probe " + "request\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_REQ); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (dst) { + memcpy(mgmt->da, dst, ETH_ALEN); + memcpy(mgmt->bssid, dst, ETH_ALEN); + } else { + memset(mgmt->da, 0xff, ETH_ALEN); + memset(mgmt->bssid, 0xff, ETH_ALEN); + } + pos = skb_put(skb, 2 + ssid_len); + *pos++ = WLAN_EID_SSID; + *pos++ = ssid_len; + memcpy(pos, ssid, ssid_len); + + supp_rates = skb_put(skb, 2); + supp_rates[0] = WLAN_EID_SUPP_RATES; + supp_rates[1] = 0; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + for (i = 0; i < sband->n_bitrates; i++) { + struct ieee80211_rate *rate = &sband->bitrates[i]; + if (esupp_rates) { + pos = skb_put(skb, 1); + esupp_rates[1]++; + } else if (supp_rates[1] == 8) { + esupp_rates = skb_put(skb, 3); + esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; + esupp_rates[1] = 1; + pos = &esupp_rates[2]; + } else { + pos = skb_put(skb, 1); + supp_rates[1]++; + } + *pos = rate->bitrate / 5; + } + + if (extra_preq_ie) + memcpy(skb_put(skb, extra_preq_ie_len), extra_preq_ie, + extra_preq_ie_len); + + ieee80211_tx_skb(sdata, skb, 0); +} + +u32 ieee80211_sta_get_rates(struct ieee80211_local *local, + struct ieee802_11_elems *elems, + enum ieee80211_band band) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_rate *bitrates; + size_t num_rates; + u32 supp_rates; + int i, j; + sband = local->hw.wiphy->bands[band]; + + if (!sband) { + WARN_ON(1); + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + } + + bitrates = sband->bitrates; + num_rates = sband->n_bitrates; + supp_rates = 0; + for (i = 0; i < elems->supp_rates_len + + elems->ext_supp_rates_len; i++) { + u8 rate = 0; + int own_rate; + if (i < elems->supp_rates_len) + rate = elems->supp_rates[i]; + else if (elems->ext_supp_rates) + rate = elems->ext_supp_rates + [i - elems->supp_rates_len]; + own_rate = 5 * (rate & 0x7f); + for (j = 0; j < num_rates; j++) + if (bitrates[j].bitrate == own_rate) + supp_rates |= BIT(j); + } + return supp_rates; +} diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 2b023dce8b24..8a76a979bc92 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -132,13 +132,12 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) return -EOPNOTSUPP; - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); if (ret) return ret; - sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; - ieee80211_sta_req_auth(sdata, &sdata->u.sta); + sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; + ieee80211_sta_req_auth(sdata); return 0; } @@ -255,16 +254,19 @@ static int ieee80211_ioctl_siwfreq(struct net_device *dev, { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_STATION) - sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + sdata->u.ibss.flags &= ~IEEE80211_IBSS_AUTO_CHANNEL_SEL; + else if (sdata->vif.type == NL80211_IFTYPE_STATION) + sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ if (freq->e == 0) { if (freq->m < 0) { - if (sdata->vif.type == NL80211_IFTYPE_ADHOC || - sdata->vif.type == NL80211_IFTYPE_STATION) - sdata->u.sta.flags |= + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + sdata->u.ibss.flags |= + IEEE80211_IBSS_AUTO_CHANNEL_SEL; + else if (sdata->vif.type == NL80211_IFTYPE_STATION) + sdata->u.mgd.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else @@ -301,32 +303,35 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, { struct ieee80211_sub_if_data *sdata; size_t len = data->length; + int ret; /* iwconfig uses nul termination in SSID.. */ if (len > 0 && ssid[len - 1] == '\0') len--; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { - int ret; + if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; - memcpy(sdata->u.sta.ssid, ssid, len); - sdata->u.sta.ssid_len = len; + memcpy(sdata->u.mgd.ssid, ssid, len); + sdata->u.mgd.ssid_len = len; return 0; } + if (data->flags) - sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; + sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; else - sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; + sdata->u.mgd.flags |= IEEE80211_STA_AUTO_SSID_SEL; + ret = ieee80211_sta_set_ssid(sdata, ssid, len); if (ret) return ret; - ieee80211_sta_req_auth(sdata, &sdata->u.sta); + + ieee80211_sta_req_auth(sdata); return 0; - } + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) + return ieee80211_ibss_set_ssid(sdata, ssid, len); return -EOPNOTSUPP; } @@ -340,8 +345,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { int res = ieee80211_sta_get_ssid(sdata, ssid, &len); if (res == 0) { data->length = len; @@ -349,6 +353,14 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, } else data->flags = 0; return res; + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + int res = ieee80211_ibss_get_ssid(sdata, ssid, &len); + if (res == 0) { + data->length = len; + data->flags = 1; + } else + data->flags = 0; + return res; } return -EOPNOTSUPP; @@ -362,26 +374,35 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { int ret; if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { - memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, + memcpy(sdata->u.mgd.bssid, (u8 *) &ap_addr->sa_data, ETH_ALEN); return 0; } if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) - sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL | + sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL; else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) - sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; + sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL; else - sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; + sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data); if (ret) return ret; - ieee80211_sta_req_auth(sdata, &sdata->u.sta); + ieee80211_sta_req_auth(sdata); return 0; + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) + sdata->u.ibss.flags |= IEEE80211_IBSS_AUTO_BSSID_SEL | + IEEE80211_IBSS_AUTO_CHANNEL_SEL; + else if (is_broadcast_ether_addr((u8 *) &ap_addr->sa_data)) + sdata->u.ibss.flags |= IEEE80211_IBSS_AUTO_BSSID_SEL; + else + sdata->u.ibss.flags &= ~IEEE80211_IBSS_AUTO_BSSID_SEL; + + return ieee80211_ibss_set_bssid(sdata, (u8 *) &ap_addr->sa_data); } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { /* * If it is necessary to update the WDS peer address @@ -410,17 +431,20 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { - if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED || - sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (sdata->u.mgd.state == IEEE80211_STA_MLME_ASSOCIATED) { ap_addr->sa_family = ARPHRD_ETHER; - memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); - return 0; - } else { + memcpy(&ap_addr->sa_data, sdata->u.mgd.bssid, ETH_ALEN); + } else memset(&ap_addr->sa_data, 0, ETH_ALEN); - return 0; - } + return 0; + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->u.ibss.state == IEEE80211_IBSS_MLME_JOINED) { + ap_addr->sa_family = ARPHRD_ETHER; + memcpy(&ap_addr->sa_data, sdata->u.ibss.bssid, ETH_ALEN); + } else + memset(&ap_addr->sa_data, 0, ETH_ALEN); + return 0; } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); @@ -486,7 +510,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, rcu_read_lock(); - sta = sta_info_get(local, sdata->u.sta.bssid); + sta = sta_info_get(local, sdata->u.mgd.bssid); if (sta && !(sta->last_tx_rate.flags & IEEE80211_TX_RC_MCS)) rate->value = sband->bitrates[sta->last_tx_rate.idx].bitrate; @@ -687,8 +711,7 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev, struct iw_mlme *mlme = (struct iw_mlme *) extra; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_ADHOC) + if (!(sdata->vif.type == NL80211_IFTYPE_STATION)) return -EINVAL; switch (mlme->cmd) { @@ -784,8 +807,7 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev, erq->flags |= IW_ENCODE_ENABLED; if (sdata->vif.type == NL80211_IFTYPE_STATION) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - switch (ifsta->auth_alg) { + switch (sdata->u.mgd.auth_alg) { case WLAN_AUTH_OPEN: case WLAN_AUTH_LEAP: erq->flags |= IW_ENCODE_OPEN; @@ -849,7 +871,7 @@ static int ieee80211_ioctl_siwpower(struct net_device *dev, ret = ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_DYNPS_TIMEOUT); - if (!(sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) + if (!(sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED)) return ret; if (conf->dynamic_ps_timeout > 0 && @@ -908,10 +930,10 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (data->value & (IW_AUTH_CIPHER_WEP40 | IW_AUTH_CIPHER_WEP104 | IW_AUTH_CIPHER_TKIP)) - sdata->u.sta.flags |= + sdata->u.mgd.flags |= IEEE80211_STA_TKIP_WEP_USED; else - sdata->u.sta.flags &= + sdata->u.mgd.flags &= ~IEEE80211_STA_TKIP_WEP_USED; } break; @@ -922,21 +944,20 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, if (sdata->vif.type != NL80211_IFTYPE_STATION) ret = -EINVAL; else { - sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; + sdata->u.mgd.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; /* * Privacy invoked by wpa_supplicant, store the * value and allow associating to a protected * network without having a key up front. */ if (data->value) - sdata->u.sta.flags |= + sdata->u.mgd.flags |= IEEE80211_STA_PRIVACY_INVOKED; } break; case IW_AUTH_80211_AUTH_ALG: - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) - sdata->u.sta.auth_algs = data->value; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + sdata->u.mgd.auth_algs = data->value; else ret = -EOPNOTSUPP; break; @@ -945,17 +966,16 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, ret = -EOPNOTSUPP; break; } - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { switch (data->value) { case IW_AUTH_MFP_DISABLED: - sdata->u.sta.mfp = IEEE80211_MFP_DISABLED; + sdata->u.mgd.mfp = IEEE80211_MFP_DISABLED; break; case IW_AUTH_MFP_OPTIONAL: - sdata->u.sta.mfp = IEEE80211_MFP_OPTIONAL; + sdata->u.mgd.mfp = IEEE80211_MFP_OPTIONAL; break; case IW_AUTH_MFP_REQUIRED: - sdata->u.sta.mfp = IEEE80211_MFP_REQUIRED; + sdata->u.mgd.mfp = IEEE80211_MFP_REQUIRED; break; default: ret = -EINVAL; @@ -980,9 +1000,9 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev rcu_read_lock(); - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) - sta = sta_info_get(local, sdata->u.sta.bssid); + if (sdata->vif.type == NL80211_IFTYPE_STATION) + sta = sta_info_get(local, sdata->u.mgd.bssid); + if (!sta) { wstats->discard.fragment = 0; wstats->discard.misc = 0; @@ -1011,9 +1031,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev, switch (data->flags & IW_AUTH_INDEX) { case IW_AUTH_80211_AUTH_ALG: - if (sdata->vif.type == NL80211_IFTYPE_STATION || - sdata->vif.type == NL80211_IFTYPE_ADHOC) - data->value = sdata->u.sta.auth_algs; + if (sdata->vif.type == NL80211_IFTYPE_STATION) + data->value = sdata->u.mgd.auth_algs; else ret = -EOPNOTSUPP; break; -- cgit v1.2.3 From 81cb7623ad3b408f871fa36b774fc20d8dfccac0 Mon Sep 17 00:00:00 2001 From: Sujith Date: Thu, 12 Feb 2009 11:38:37 +0530 Subject: mac80211: Extend the rate control API with an update callback The AP can switch dynamically between 20/40 Mhz channel width, in which case we switch the local operating channel, but the rate control algorithm is not notified. This patch adds a new callback to indicate such changes to the RC algorithm. Currently, HT channel width change is notified, but this callback can be used to indicate any new requirements that might come up later on. Signed-off-by: Sujith Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ht.c | 13 +++++++++++++ net/mac80211/rate.h | 12 ++++++++++++ 2 files changed, 25 insertions(+) (limited to 'net') diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 69b6e9a4df3d..4e3c72f20de7 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -17,6 +17,7 @@ #include #include #include "ieee80211_i.h" +#include "rate.h" void ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_supported_band *sband, struct ieee80211_ht_cap *ht_cap_ie, @@ -93,7 +94,9 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_bss_ht_conf ht; + struct sta_info *sta; u32 changed = 0; bool enable_ht = true, ht_changed; enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; @@ -136,6 +139,16 @@ u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata, if (ht_changed) { /* channel_type change automatically detected */ ieee80211_hw_config(local, 0); + + rcu_read_lock(); + + sta = sta_info_get(local, ifmgd->bssid); + if (sta) + rate_control_rate_update(local, sband, sta, + IEEE80211_RC_HT_CHANGED); + + rcu_read_unlock(); + } /* disable HT */ diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 928da625e281..b9164c9a9563 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -62,6 +62,18 @@ static inline void rate_control_rate_init(struct sta_info *sta) ref->ops->rate_init(ref->priv, sband, ista, priv_sta); } +static inline void rate_control_rate_update(struct ieee80211_local *local, + struct ieee80211_supported_band *sband, + struct sta_info *sta, u32 changed) +{ + struct rate_control_ref *ref = local->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + + if (ref->ops->rate_update) + ref->ops->rate_update(ref->priv, sband, ista, + priv_sta, changed); +} static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, struct ieee80211_sta *sta, -- cgit v1.2.3 From 13e967b2926a51e1913ea42711eaf4108372fd44 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 13 Feb 2009 16:39:35 -0800 Subject: wireless: fix for CONFIG_NL80211=n Add empty function for case of CONFIG_NL80211=n: net/wireless/scan.c:35: error: implicit declaration of function 'nl80211_send_scan_aborted' Signed-off-by: Randy Dunlap Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b565a5f84e97..69787b621365 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -27,6 +27,10 @@ static inline void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, struct net_device *netdev) {} +static inline void nl80211_send_scan_aborted( + struct cfg80211_registered_device *rdev, + struct net_device *netdev) +{} #endif /* CONFIG_NL80211 */ #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3 From 70692ad2923a379e0a10f9ec2ad93fbbe084cc46 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Mon, 16 Feb 2009 19:39:13 +0200 Subject: nl80211: Optional IEs into scan request This extends the NL80211_CMD_TRIGGER_SCAN command to allow applications to specify a set of information element(s) to be added into Probe Request frames with NL80211_ATTR_IE. This provides support for the MLME-SCAN.request primitive parameter VendorSpecificInfo and can be used, e.g., to implement WPS scanning. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/mlme.c | 8 +++++--- net/mac80211/scan.c | 3 ++- net/mac80211/util.c | 7 +++++-- net/wireless/nl80211.c | 21 ++++++++++++++++++++- 5 files changed, 34 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 27d56414019d..d06c75720ced 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1093,7 +1093,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u8 *extra, size_t extra_len, const u8 *bssid, int encrypt); void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - u8 *ssid, size_t ssid_len); + u8 *ssid, size_t ssid_len, + u8 *ie, size_t ie_len); void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, const size_t supp_rates_len, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index ec5a0900cba0..5a4977936f6f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -716,7 +716,7 @@ static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata) * will not answer to direct packet in unassociated state. */ ieee80211_send_probe_req(sdata, NULL, - ifmgd->ssid, ifmgd->ssid_len); + ifmgd->ssid, ifmgd->ssid_len, NULL, 0); mod_timer(&ifmgd->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } @@ -946,7 +946,8 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) } else ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, - ifmgd->ssid_len); + ifmgd->ssid_len, + NULL, 0); ifmgd->flags ^= IEEE80211_STA_PROBEREQ_POLL; } else { ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL; @@ -955,7 +956,8 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) ifmgd->last_probe = jiffies; ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, - ifmgd->ssid_len); + ifmgd->ssid_len, + NULL, 0); } } } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 08a1fc27ca10..c063f8204263 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -338,7 +338,8 @@ void ieee80211_scan_work(struct work_struct *work) ieee80211_send_probe_req( sdata, NULL, local->scan_req->ssids[i].ssid, - local->scan_req->ssids[i].ssid_len); + local->scan_req->ssids[i].ssid_len, + local->scan_req->ie, local->scan_req->ie_len); next_delay = IEEE80211_CHANNEL_TIME; break; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index dee17e5cbb89..e0431a1d218b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -884,7 +884,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, } void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, - u8 *ssid, size_t ssid_len) + u8 *ssid, size_t ssid_len, + u8 *ie, size_t ie_len) { struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; @@ -903,7 +904,7 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, } skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + - extra_preq_ie_len); + ie_len + extra_preq_ie_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for probe " "request\n", sdata->dev->name); @@ -950,6 +951,8 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, *pos = rate->bitrate / 5; } + if (ie) + memcpy(skb_put(skb, ie_len), ie, ie_len); if (extra_preq_ie) memcpy(skb_put(skb, extra_preq_ie_len), extra_preq_ie, extra_preq_ie_len); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 298a4de59948..67b18b3a93a0 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2286,6 +2286,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) struct wiphy *wiphy; int err, tmp, n_ssids = 0, n_channels = 0, i; enum ieee80211_band band; + size_t ie_len; err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) @@ -2327,9 +2328,15 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out_unlock; } + if (info->attrs[NL80211_ATTR_IE]) + ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + else + ie_len = 0; + request = kzalloc(sizeof(*request) + sizeof(*ssid) * n_ssids - + sizeof(channel) * n_channels, GFP_KERNEL); + + sizeof(channel) * n_channels + + ie_len, GFP_KERNEL); if (!request) { err = -ENOMEM; goto out_unlock; @@ -2340,6 +2347,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (n_ssids) request->ssids = (void *)(request->channels + n_channels); request->n_ssids = n_ssids; + if (ie_len) { + if (request->ssids) + request->ie = (void *)(request->ssids + n_ssids); + else + request->ie = (void *)(request->channels + n_channels); + } if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { /* user specified, bail out if channel not found */ @@ -2380,6 +2393,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_IE]) { + request->ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + memcpy(request->ie, nla_data(info->attrs[NL80211_ATTR_IE]), + request->ie_len); + } + request->ifidx = dev->ifindex; request->wiphy = &drv->wiphy; -- cgit v1.2.3 From 98c8a60a04316e94ccea8221cf16768ce91bd214 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 17 Feb 2009 13:24:57 +0200 Subject: nl80211: Provide access to STA TX/RX packet counters The TX/RX packet counters are needed to fill in RADIUS Accounting attributes Acct-Output-Packets and Acct-Input-Packets. We already collect the needed information, but only the TX/RX bytes were previously exposed through nl80211. Allow applications to fetch the packet counters, too, to provide more complete support for accounting. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 4 ++++ net/wireless/nl80211.c | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f453bb7c564b..c43129efc3bf 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -341,11 +341,15 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->filled = STATION_INFO_INACTIVE_TIME | STATION_INFO_RX_BYTES | STATION_INFO_TX_BYTES | + STATION_INFO_RX_PACKETS | + STATION_INFO_TX_PACKETS | STATION_INFO_TX_BITRATE; sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); sinfo->rx_bytes = sta->rx_bytes; sinfo->tx_bytes = sta->tx_bytes; + sinfo->rx_packets = sta->rx_packets; + sinfo->tx_packets = sta->tx_packets; if (sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { sinfo->filled |= STATION_INFO_SIGNAL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 67b18b3a93a0..badccf98074e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1206,6 +1206,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, nla_nest_end(msg, txrate); } + if (sinfo->filled & STATION_INFO_RX_PACKETS) + NLA_PUT_U32(msg, NL80211_STA_INFO_RX_PACKETS, + sinfo->rx_packets); + if (sinfo->filled & STATION_INFO_TX_PACKETS) + NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS, + sinfo->tx_packets); nla_nest_end(msg, sinfoattr); return genlmsg_end(msg, hdr); -- cgit v1.2.3 From cb3a8eec0e66edfe8db7d3b3bf19d25745bae3c3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 11 Feb 2009 17:14:43 -0500 Subject: cfg80211: age scan results on resume Scanned BSS entries are timestamped with jiffies, which doesn't increment across suspend and hibernate. On resume, every BSS in the scan list looks like it was scanned within the last 10 seconds, irregardless of how long the machine was actually asleep. Age scan results on resume with the time spent during sleep so userspace has a clue how old they really are. Signed-off-by: Dan Williams Signed-off-by: John W. Linville --- net/wireless/core.h | 3 +++ net/wireless/scan.c | 25 +++++++++++++++++++++++-- net/wireless/sysfs.c | 7 +++++++ 3 files changed, 33 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index e29ad4cd464f..5d0c682d737a 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -49,6 +49,7 @@ struct cfg80211_registered_device { struct rb_root bss_tree; u32 bss_generation; struct cfg80211_scan_request *scan_req; /* protected by RTNL */ + unsigned long suspend_at; /* must be last because of the way we do wiphy_priv(), * and it should at least be aligned to NETDEV_ALIGN */ @@ -113,5 +114,7 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy); void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); void cfg80211_bss_expire(struct cfg80211_registered_device *dev); +void cfg80211_bss_age(struct cfg80211_registered_device *dev, + unsigned long age_secs); #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index b1893c863b97..9fad1631d6cb 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -61,6 +61,18 @@ static void bss_release(struct kref *ref) kfree(bss); } +/* must hold dev->bss_lock! */ +void cfg80211_bss_age(struct cfg80211_registered_device *dev, + unsigned long age_secs) +{ + struct cfg80211_internal_bss *bss; + unsigned long age_jiffies = msecs_to_jiffies(age_secs * MSEC_PER_SEC); + + list_for_each_entry(bss, &dev->bss_list, list) { + bss->ts -= age_jiffies; + } +} + /* must hold dev->bss_lock! */ void cfg80211_bss_expire(struct cfg80211_registered_device *dev) { @@ -584,6 +596,15 @@ static void ieee80211_scan_add_ies(struct iw_request_info *info, } } +static inline unsigned int elapsed_jiffies_msecs(unsigned long start) +{ + unsigned long end = jiffies; + + if (end >= start) + return jiffies_to_msecs(end - start); + + return jiffies_to_msecs(end + (MAX_JIFFY_OFFSET - start) + 1); +} static char * ieee80211_bss(struct iw_request_info *info, @@ -763,8 +784,8 @@ ieee80211_bss(struct iw_request_info *info, &iwe, buf); memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVCUSTOM; - sprintf(buf, " Last beacon: %dms ago", - jiffies_to_msecs(jiffies - bss->ts)); + sprintf(buf, " Last beacon: %ums ago", + elapsed_jiffies_msecs(bss->ts)); iwe.u.data.length = strlen(buf); current_ev = iwe_stream_add_point(info, current_ev, end_buf, &iwe, buf); diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 26a72b0797a0..15feaeb5ced5 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -60,6 +60,8 @@ static int wiphy_suspend(struct device *dev, pm_message_t state) struct cfg80211_registered_device *rdev = dev_to_rdev(dev); int ret = 0; + rdev->suspend_at = get_seconds(); + if (rdev->ops->suspend) { rtnl_lock(); ret = rdev->ops->suspend(&rdev->wiphy); @@ -74,6 +76,11 @@ static int wiphy_resume(struct device *dev) struct cfg80211_registered_device *rdev = dev_to_rdev(dev); int ret = 0; + /* Age scan results with time spent in suspend */ + spin_lock_bh(&rdev->bss_lock); + cfg80211_bss_age(rdev, get_seconds() - rdev->suspend_at); + spin_unlock_bh(&rdev->bss_lock); + if (rdev->ops->resume) { rtnl_lock(); ret = rdev->ops->resume(&rdev->wiphy); -- cgit v1.2.3 From a77b855245541823b49999a27245ad7428879096 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Feb 2009 18:27:22 +0100 Subject: cfg80211/mac80211: fill qual.qual value/adjust max_qual.qual Due to various bugs in the software stack we end up having to fill qual.qual; level should be used, but wpa_supplicant doesn't properly ignore qual.qual, NM should use qual.level regardless of that because qual.qual is 0 but doesn't handle IW_QUAL_DBM right now. So fill qual.qual with the qual.level value clamped to -110..-40 dBm or just the regular 'unspecified' signal level. This requires a mac80211 change to properly announce the max_qual.qual and avg_qual.qual values. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/wext.c | 18 ++++++++++++++++-- net/wireless/scan.c | 15 ++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 8a76a979bc92..a8d4b6171916 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -200,10 +200,24 @@ static int ieee80211_ioctl_giwrange(struct net_device *dev, else range->max_qual.noise = 0; - range->max_qual.qual = 100; range->max_qual.updated = ieee80211_get_wstats_flags(local); - range->avg_qual.qual = 50; + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + /* + * cfg80211 assumes -110 to -40 dBm and clamps to that range + * for qual.qual, so tell userspace this is what we give it + * but take into account that we have to start from 0. + */ + range->max_qual.qual = 70; + range->avg_qual.qual = 35; + } else { + /* + * cfg80211 just uses the level value for qual too, and it + * requires the level value to be 0 .. 100. + */ + range->max_qual.qual = 100; + range->avg_qual.qual = 50; + } /* not always true but better than nothing */ range->avg_qual.level = range->max_qual.level / 2; range->avg_qual.noise = range->max_qual.noise / 2; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9fad1631d6cb..01c136d98c5b 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -614,7 +614,7 @@ ieee80211_bss(struct iw_request_info *info, struct iw_event iwe; u8 *buf, *cfg, *p; u8 *ie = bss->pub.information_elements; - int rem = bss->pub.len_information_elements, i; + int rem = bss->pub.len_information_elements, i, sig; bool ismesh = false; memset(&iwe, 0, sizeof(iwe)); @@ -643,14 +643,23 @@ ieee80211_bss(struct iw_request_info *info, iwe.cmd = IWEVQUAL; iwe.u.qual.updated = IW_QUAL_LEVEL_UPDATED | IW_QUAL_NOISE_INVALID | - IW_QUAL_QUAL_INVALID; + IW_QUAL_QUAL_UPDATED; switch (bss->pub.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: - iwe.u.qual.level = bss->pub.signal / 100; + sig = bss->pub.signal / 100; + iwe.u.qual.level = sig; iwe.u.qual.updated |= IW_QUAL_DBM; + if (sig < -110) /* rather bad */ + sig = -110; + else if (sig > -40) /* perfect */ + sig = -40; + /* will give a range of 0 .. 70 */ + iwe.u.qual.qual = sig + 110; break; case CFG80211_SIGNAL_TYPE_UNSPEC: iwe.u.qual.level = bss->pub.signal; + /* will give range 0 .. 100 */ + iwe.u.qual.qual = bss->pub.signal; break; default: /* not reached */ -- cgit v1.2.3 From 0a16ec5f5ed38076026960332a9ea4746dc1f3c6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Feb 2009 09:56:47 +0100 Subject: mac80211: add missing kernel-doc Document the new shutdown member. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/sta_info.h | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index a2921f15787b..1f45573c580c 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -90,6 +90,7 @@ struct tid_ampdu_tx { * @buf_size: buffer size for incoming A-MPDUs * @timeout: reset timer value (in TUs). * @dialog_token: dialog token for aggregation session + * @shutdown: this session is being shut down due to STA removal */ struct tid_ampdu_rx { struct sk_buff **reorder_buf; -- cgit v1.2.3 From 630e64c487c0a9550f05b465216a1cd9125b52f2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Feb 2009 10:48:07 +0100 Subject: nl80211: remove admin requirement from station get There's no particular reason to not let untrusted users see this information -- it's just the stations we're talking to, packet counters for them and possibly some mesh things. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index badccf98074e..245fddcc77c3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2626,7 +2626,6 @@ static struct genl_ops nl80211_ops[] = { .doit = nl80211_get_station, .dumpit = nl80211_dump_station, .policy = nl80211_policy, - .flags = GENL_ADMIN_PERM, }, { .cmd = NL80211_CMD_SET_STATION, -- cgit v1.2.3 From 77965c970d7da9c9b6349ff2b1d9adecf54c403b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Feb 2009 18:45:06 +0100 Subject: cfg80211: clean up signal type It wasn't a good idea to make the signal type a per-BSS option, although then it is closer to the actual value. Move it to be a per-wiphy setting, update mac80211 to match. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 5 +++++ net/mac80211/scan.c | 11 +++-------- net/wireless/nl80211.c | 2 +- net/wireless/scan.c | 21 +++++++++------------ 4 files changed, 18 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index fce9d08986e9..f38db4d37e5d 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -861,6 +861,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* mac80211 always supports monitor */ local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; + else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) + local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC; + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index c063f8204263..23f4de274744 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -63,20 +63,15 @@ ieee80211_bss_info_update(struct ieee80211_local *local, { struct ieee80211_bss *bss; int clen; - enum cfg80211_signal_type sigtype = CFG80211_SIGNAL_TYPE_NONE; s32 signal = 0; - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { - sigtype = CFG80211_SIGNAL_TYPE_MBM; + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) signal = rx_status->signal * 100; - } else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) { - sigtype = CFG80211_SIGNAL_TYPE_UNSPEC; + else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) signal = (rx_status->signal * 100) / local->hw.max_signal; - } bss = (void *)cfg80211_inform_bss_frame(local->hw.wiphy, channel, - mgmt, len, signal, sigtype, - GFP_ATOMIC); + mgmt, len, signal, GFP_ATOMIC); if (!bss) return NULL; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 245fddcc77c3..a7e751edc739 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2457,7 +2457,7 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U16(msg, NL80211_BSS_CAPABILITY, res->capability); NLA_PUT_U32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq); - switch (res->signal_type) { + switch (rdev->wiphy.signal_type) { case CFG80211_SIGNAL_TYPE_MBM: NLA_PUT_U32(msg, NL80211_BSS_SIGNAL_MBM, res->signal); break; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 01c136d98c5b..60600657b657 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -370,7 +370,6 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev, found->pub.beacon_interval = res->pub.beacon_interval; found->pub.tsf = res->pub.tsf; found->pub.signal = res->pub.signal; - found->pub.signal_type = res->pub.signal_type; found->pub.capability = res->pub.capability; found->ts = res->ts; kref_put(&res->ref, bss_release); @@ -392,8 +391,7 @@ struct cfg80211_bss * cfg80211_inform_bss_frame(struct wiphy *wiphy, struct ieee80211_channel *channel, struct ieee80211_mgmt *mgmt, size_t len, - s32 signal, enum cfg80211_signal_type sigtype, - gfp_t gfp) + s32 signal, gfp_t gfp) { struct cfg80211_internal_bss *res; size_t ielen = len - offsetof(struct ieee80211_mgmt, @@ -401,7 +399,7 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, bool overwrite; size_t privsz = wiphy->bss_priv_size; - if (WARN_ON(sigtype == NL80211_BSS_SIGNAL_UNSPEC && + if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC && (signal < 0 || signal > 100))) return NULL; @@ -415,7 +413,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, memcpy(res->pub.bssid, mgmt->bssid, ETH_ALEN); res->pub.channel = channel; - res->pub.signal_type = sigtype; res->pub.signal = signal; res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp); res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int); @@ -607,9 +604,9 @@ static inline unsigned int elapsed_jiffies_msecs(unsigned long start) } static char * -ieee80211_bss(struct iw_request_info *info, - struct cfg80211_internal_bss *bss, - char *current_ev, char *end_buf) +ieee80211_bss(struct wiphy *wiphy, struct iw_request_info *info, + struct cfg80211_internal_bss *bss, char *current_ev, + char *end_buf) { struct iw_event iwe; u8 *buf, *cfg, *p; @@ -638,13 +635,13 @@ ieee80211_bss(struct iw_request_info *info, current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, IW_EV_FREQ_LEN); - if (bss->pub.signal_type != CFG80211_SIGNAL_TYPE_NONE) { + if (wiphy->signal_type != CFG80211_SIGNAL_TYPE_NONE) { memset(&iwe, 0, sizeof(iwe)); iwe.cmd = IWEVQUAL; iwe.u.qual.updated = IW_QUAL_LEVEL_UPDATED | IW_QUAL_NOISE_INVALID | IW_QUAL_QUAL_UPDATED; - switch (bss->pub.signal_type) { + switch (wiphy->signal_type) { case CFG80211_SIGNAL_TYPE_MBM: sig = bss->pub.signal / 100; iwe.u.qual.level = sig; @@ -823,8 +820,8 @@ static int ieee80211_scan_results(struct cfg80211_registered_device *dev, spin_unlock_bh(&dev->bss_lock); return -E2BIG; } - current_ev = ieee80211_bss(info, bss, - current_ev, end_buf); + current_ev = ieee80211_bss(&dev->wiphy, info, bss, + current_ev, end_buf); } spin_unlock_bh(&dev->bss_lock); return current_ev - buf; -- cgit v1.2.3 From 4aa188e1a868d25c5b93e48e5d29bbd0f9d3bc3a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Feb 2009 19:32:08 +0100 Subject: mac80211/cfg80211: move iwrange handler to cfg80211 The previous patch made cfg80211 generally aware of the signal type a given hardware will give, so now it can implement SIOCGIWRANGE itself, removing more wext stuff from mac80211. Might need to be a little more parametrized once we have more hardware using cfg80211 and new hardware capabilities. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/wext.c | 135 ++++++--------------------------------------- net/wireless/wext-compat.c | 97 ++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 119 deletions(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index a8d4b6171916..f6924fc065d3 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -144,124 +144,6 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, return -EOPNOTSUPP; } -static u8 ieee80211_get_wstats_flags(struct ieee80211_local *local) -{ - u8 wstats_flags = 0; - - wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | - IEEE80211_HW_SIGNAL_DBM) ? - IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; - wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? - IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID; - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) - wstats_flags |= IW_QUAL_DBM; - - return wstats_flags; -} - -static int ieee80211_ioctl_giwrange(struct net_device *dev, - struct iw_request_info *info, - struct iw_point *data, char *extra) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct iw_range *range = (struct iw_range *) extra; - enum ieee80211_band band; - int c = 0; - - data->length = sizeof(struct iw_range); - memset(range, 0, sizeof(struct iw_range)); - - range->we_version_compiled = WIRELESS_EXT; - range->we_version_source = 21; - range->retry_capa = IW_RETRY_LIMIT; - range->retry_flags = IW_RETRY_LIMIT; - range->min_retry = 0; - range->max_retry = 255; - range->min_rts = 0; - range->max_rts = 2347; - range->min_frag = 256; - range->max_frag = 2346; - - range->encoding_size[0] = 5; - range->encoding_size[1] = 13; - range->num_encoding_sizes = 2; - range->max_encoding_tokens = NUM_DEFAULT_KEYS; - - /* cfg80211 requires this, and enforces 0..100 */ - if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) - range->max_qual.level = 100; - else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) - range->max_qual.level = -110; - else - range->max_qual.level = 0; - - if (local->hw.flags & IEEE80211_HW_NOISE_DBM) - range->max_qual.noise = -110; - else - range->max_qual.noise = 0; - - range->max_qual.updated = ieee80211_get_wstats_flags(local); - - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { - /* - * cfg80211 assumes -110 to -40 dBm and clamps to that range - * for qual.qual, so tell userspace this is what we give it - * but take into account that we have to start from 0. - */ - range->max_qual.qual = 70; - range->avg_qual.qual = 35; - } else { - /* - * cfg80211 just uses the level value for qual too, and it - * requires the level value to be 0 .. 100. - */ - range->max_qual.qual = 100; - range->avg_qual.qual = 50; - } - /* not always true but better than nothing */ - range->avg_qual.level = range->max_qual.level / 2; - range->avg_qual.noise = range->max_qual.noise / 2; - range->avg_qual.updated = ieee80211_get_wstats_flags(local); - - range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | - IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; - - - for (band = 0; band < IEEE80211_NUM_BANDS; band ++) { - int i; - struct ieee80211_supported_band *sband; - - sband = local->hw.wiphy->bands[band]; - - if (!sband) - continue; - - for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) { - struct ieee80211_channel *chan = &sband->channels[i]; - - if (!(chan->flags & IEEE80211_CHAN_DISABLED)) { - range->freq[c].i = - ieee80211_frequency_to_channel( - chan->center_freq); - range->freq[c].m = chan->center_freq; - range->freq[c].e = 6; - c++; - } - } - } - range->num_channels = c; - range->num_frequency = c; - - IW_EVENT_CAPA_SET_KERNEL(range->event_capa); - IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); - IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); - - range->scan_capa |= IW_SCAN_CAPA_ESSID; - - return 0; -} - - static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) @@ -1004,6 +886,21 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, return ret; } +static u8 ieee80211_get_wstats_flags(struct ieee80211_local *local) +{ + u8 wstats_flags = 0; + + wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | + IEEE80211_HW_SIGNAL_DBM) ? + IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; + wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? + IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID; + if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) + wstats_flags |= IW_QUAL_DBM; + + return wstats_flags; +} + /* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev) { @@ -1149,7 +1046,7 @@ static const iw_handler ieee80211_handler[] = (iw_handler) NULL, /* SIOCSIWSENS */ (iw_handler) NULL, /* SIOCGIWSENS */ (iw_handler) NULL /* not used */, /* SIOCSIWRANGE */ - (iw_handler) ieee80211_ioctl_giwrange, /* SIOCGIWRANGE */ + (iw_handler) cfg80211_wext_giwrange, /* SIOCGIWRANGE */ (iw_handler) NULL /* not used */, /* SIOCSIWPRIV */ (iw_handler) NULL /* kernel code */, /* SIOCGIWPRIV */ (iw_handler) NULL /* not used */, /* SIOCSIWSTATS */ diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 58e489fd4aed..b84a9b4fe96a 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -137,3 +137,100 @@ int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, return 0; } EXPORT_SYMBOL(cfg80211_wext_giwmode); + + +int cfg80211_wext_giwrange(struct net_device *dev, + struct iw_request_info *info, + struct iw_point *data, char *extra) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct iw_range *range = (struct iw_range *) extra; + enum ieee80211_band band; + int c = 0; + + if (!wdev) + return -EOPNOTSUPP; + + data->length = sizeof(struct iw_range); + memset(range, 0, sizeof(struct iw_range)); + + range->we_version_compiled = WIRELESS_EXT; + range->we_version_source = 21; + range->retry_capa = IW_RETRY_LIMIT; + range->retry_flags = IW_RETRY_LIMIT; + range->min_retry = 0; + range->max_retry = 255; + range->min_rts = 0; + range->max_rts = 2347; + range->min_frag = 256; + range->max_frag = 2346; + + range->encoding_size[0] = 5; + range->encoding_size[1] = 13; + range->num_encoding_sizes = 2; + range->max_encoding_tokens = 4; + + range->max_qual.updated = IW_QUAL_NOISE_INVALID; + + switch (wdev->wiphy->signal_type) { + case CFG80211_SIGNAL_TYPE_NONE: + break; + case CFG80211_SIGNAL_TYPE_MBM: + range->max_qual.level = -110; + range->max_qual.qual = 70; + range->avg_qual.qual = 35; + range->max_qual.updated |= IW_QUAL_DBM; + range->max_qual.updated |= IW_QUAL_QUAL_UPDATED; + range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED; + break; + case CFG80211_SIGNAL_TYPE_UNSPEC: + range->max_qual.level = 100; + range->max_qual.qual = 100; + range->avg_qual.qual = 50; + range->max_qual.updated |= IW_QUAL_QUAL_UPDATED; + range->max_qual.updated |= IW_QUAL_LEVEL_UPDATED; + break; + } + + range->avg_qual.level = range->max_qual.level / 2; + range->avg_qual.noise = range->max_qual.noise / 2; + range->avg_qual.updated = range->max_qual.updated; + + range->enc_capa = IW_ENC_CAPA_WPA | IW_ENC_CAPA_WPA2 | + IW_ENC_CAPA_CIPHER_TKIP | IW_ENC_CAPA_CIPHER_CCMP; + + + for (band = 0; band < IEEE80211_NUM_BANDS; band ++) { + int i; + struct ieee80211_supported_band *sband; + + sband = wdev->wiphy->bands[band]; + + if (!sband) + continue; + + for (i = 0; i < sband->n_channels && c < IW_MAX_FREQUENCIES; i++) { + struct ieee80211_channel *chan = &sband->channels[i]; + + if (!(chan->flags & IEEE80211_CHAN_DISABLED)) { + range->freq[c].i = + ieee80211_frequency_to_channel( + chan->center_freq); + range->freq[c].m = chan->center_freq; + range->freq[c].e = 6; + c++; + } + } + } + range->num_channels = c; + range->num_frequency = c; + + IW_EVENT_CAPA_SET_KERNEL(range->event_capa); + IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWAP); + IW_EVENT_CAPA_SET(range->event_capa, SIOCGIWSCAN); + + range->scan_capa |= IW_SCAN_CAPA_ESSID; + + return 0; +} +EXPORT_SYMBOL(cfg80211_wext_giwrange); -- cgit v1.2.3 From 80e775bf08f1915870fbb0c1c7a45a3fdc291721 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Fri, 20 Feb 2009 15:37:03 +0100 Subject: mac80211: Add software scan notifiers This adds optional notifier functions for software scan. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- net/mac80211/scan.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 23f4de274744..0e81e1633a66 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -245,6 +245,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) netif_addr_unlock(local->mdev); netif_tx_unlock_bh(local->mdev); + if (local->ops->sw_scan_complete) + local->ops->sw_scan_complete(local_to_hw(local)); + mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { if (!netif_running(sdata->dev)) @@ -395,6 +398,8 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, } local->sw_scanning = true; + if (local->ops->sw_scan_start) + local->ops->sw_scan_start(local_to_hw(local)); mutex_lock(&local->iflist_mtx); list_for_each_entry(sdata, &local->interfaces, list) { -- cgit v1.2.3 From 79f6440c527c61bcd84edfbdeb390841b9fe5095 Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Sat, 21 Feb 2009 01:27:29 +0100 Subject: mac80211: Introduce a generic commit() to apply changes This patch introduces a generic commit() function which initiate a new network joining process. It should be called after some interface config changes, so that the changes get applied more cleanly. Currently set_ssid() and set_bssid() call it. Others can be added in future patches. In version 1 the header files was forgotten, sorry. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 30 +++++++++++++++++++----------- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 25 ++++++++++++++++--------- 3 files changed, 37 insertions(+), 20 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 1bbfc7029879..aa8937c56285 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -788,6 +788,23 @@ void ieee80211_ibss_setup_sdata(struct ieee80211_sub_if_data *sdata) IEEE80211_IBSS_AUTO_CHANNEL_SEL; } +int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; + + ifibss->flags &= ~IEEE80211_IBSS_PREV_BSSID_SET; + + if (ifibss->ssid_len) + ifibss->flags |= IEEE80211_IBSS_SSID_SET; + else + ifibss->flags &= ~IEEE80211_IBSS_SSID_SET; + + ifibss->ibss_join_req = jiffies; + ifibss->state = IEEE80211_IBSS_MLME_SEARCH; + + return ieee80211_sta_find_ibss(sdata); +} + int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; @@ -801,16 +818,7 @@ int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, siz ifibss->ssid_len = len; } - ifibss->flags &= ~IEEE80211_IBSS_PREV_BSSID_SET; - - if (len) - ifibss->flags |= IEEE80211_IBSS_SSID_SET; - else - ifibss->flags &= ~IEEE80211_IBSS_SSID_SET; - - ifibss->ibss_join_req = jiffies; - ifibss->state = IEEE80211_IBSS_MLME_SEARCH; - return ieee80211_sta_find_ibss(sdata); + return ieee80211_ibss_commit(sdata); } int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) @@ -842,7 +850,7 @@ int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) } } - return ieee80211_ibss_set_ssid(sdata, ifibss->ssid, ifibss->ssid_len); + return ieee80211_ibss_commit(sdata); } /* scan finished notification */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d06c75720ced..ecbc8e0cb3e7 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -934,6 +934,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); ieee80211_rx_result ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_rx_status *rx_status); +int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata); int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); @@ -944,6 +945,7 @@ void ieee80211_send_pspoll(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); /* IBSS code */ +int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata); int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); int ieee80211_ibss_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); int ieee80211_ibss_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 5a4977936f6f..7f238589b6ff 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1855,6 +1855,20 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata) } } +int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; + + if (ifmgd->ssid_len) + ifmgd->flags |= IEEE80211_STA_SSID_SET; + else + ifmgd->flags &= ~IEEE80211_STA_SSID_SET; + + return 0; +} + int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) { struct ieee80211_if_managed *ifmgd; @@ -1870,14 +1884,7 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size ifmgd->ssid_len = len; } - ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - - if (len) - ifmgd->flags |= IEEE80211_STA_SSID_SET; - else - ifmgd->flags &= ~IEEE80211_STA_SSID_SET; - - return 0; + return ieee80211_sta_commit(sdata); } int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) @@ -1907,7 +1914,7 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) } } - return ieee80211_sta_set_ssid(sdata, ifmgd->ssid, ifmgd->ssid_len); + return ieee80211_sta_commit(sdata); } int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) -- cgit v1.2.3 From b5850a7a4fd5bcab4f6a2c49e5b4ab9ebb1d5d44 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:19 -0500 Subject: cfg80211: rename cfg80211_registered_device's idx to wiphy_idx Makes it clearer to read when comparing to ifidx Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/core.c | 35 ++++++++++++++++++----------------- net/wireless/core.h | 2 +- net/wireless/nl80211.c | 4 ++-- net/wireless/sysfs.c | 2 +- 4 files changed, 22 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 0668b2bfc1da..2b3e786ec53f 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -37,12 +37,13 @@ DEFINE_MUTEX(cfg80211_drv_mutex); static struct dentry *ieee80211_debugfs_dir; /* requires cfg80211_drv_mutex to be held! */ -static struct cfg80211_registered_device *cfg80211_drv_by_wiphy(int wiphy) +static struct cfg80211_registered_device * +cfg80211_drv_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *drv; list_for_each_entry(drv, &cfg80211_drv_list, list) { - if (drv->idx == wiphy) { + if (drv->wiphy_idx == wiphy_idx) { result = drv; break; } @@ -56,12 +57,12 @@ static struct cfg80211_registered_device * __cfg80211_drv_from_info(struct genl_info *info) { int ifindex; - struct cfg80211_registered_device *bywiphy = NULL, *byifidx = NULL; + struct cfg80211_registered_device *bywiphyidx = NULL, *byifidx = NULL; struct net_device *dev; int err = -EINVAL; if (info->attrs[NL80211_ATTR_WIPHY]) { - bywiphy = cfg80211_drv_by_wiphy( + bywiphyidx = cfg80211_drv_by_wiphy_idx( nla_get_u32(info->attrs[NL80211_ATTR_WIPHY])); err = -ENODEV; } @@ -78,14 +79,14 @@ __cfg80211_drv_from_info(struct genl_info *info) err = -ENODEV; } - if (bywiphy && byifidx) { - if (bywiphy != byifidx) + if (bywiphyidx && byifidx) { + if (bywiphyidx != byifidx) return ERR_PTR(-EINVAL); else - return bywiphy; /* == byifidx */ + return bywiphyidx; /* == byifidx */ } - if (bywiphy) - return bywiphy; + if (bywiphyidx) + return bywiphyidx; if (byifidx) return byifidx; @@ -143,16 +144,16 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) { struct cfg80211_registered_device *drv; - int idx, taken = -1, result, digits; + int wiphy_idx, taken = -1, result, digits; mutex_lock(&cfg80211_drv_mutex); /* prohibit calling the thing phy%d when %d is not its number */ - sscanf(newname, PHY_NAME "%d%n", &idx, &taken); - if (taken == strlen(newname) && idx != rdev->idx) { - /* count number of places needed to print idx */ + sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); + if (taken == strlen(newname) && wiphy_idx != rdev->wiphy_idx) { + /* count number of places needed to print wiphy_idx */ digits = 1; - while (idx /= 10) + while (wiphy_idx /= 10) digits++; /* * deny the name if it is phy where is printed @@ -222,9 +223,9 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_lock(&cfg80211_drv_mutex); - drv->idx = wiphy_counter++; + drv->wiphy_idx = wiphy_counter++; - if (unlikely(drv->idx < 0)) { + if (unlikely(drv->wiphy_idx < 0)) { wiphy_counter--; mutex_unlock(&cfg80211_drv_mutex); /* ugh, wrapped! */ @@ -235,7 +236,7 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_unlock(&cfg80211_drv_mutex); /* give it a proper name */ - dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->idx); + dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->wiphy_idx); mutex_init(&drv->mtx); mutex_init(&drv->devlist_mtx); diff --git a/net/wireless/core.h b/net/wireless/core.h index 5d0c682d737a..178378124800 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -37,7 +37,7 @@ struct cfg80211_registered_device { enum environment_cap env; /* wiphy index, internal only */ - int idx; + int wiphy_idx; /* associate netdev list */ struct mutex devlist_mtx; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a7e751edc739..b176bb800100 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -142,7 +142,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, if (!hdr) return -1; - NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->wiphy_idx); NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS, dev->wiphy.max_scan_ssids); @@ -2763,7 +2763,7 @@ static int nl80211_send_scan_donemsg(struct sk_buff *msg, if (!hdr) return -1; - NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->idx); + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); /* XXX: we should probably bounce back the request? */ diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 15feaeb5ced5..efe3c5c92b2d 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -31,7 +31,7 @@ static ssize_t name ## _show(struct device *dev, \ return sprintf(buf, fmt "\n", dev_to_rdev(dev)->member); \ } -SHOW_FMT(index, "%d", idx); +SHOW_FMT(index, "%d", wiphy_idx); SHOW_FMT(macaddress, "%pM", wiphy.perm_addr); static struct device_attribute ieee80211_dev_attrs[] = { -- cgit v1.2.3 From 85fd129a721e6e892dbaaf05203baf819730f699 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:20 -0500 Subject: cfg80211: add wiphy_idx_valid to check for wiphy_idx sanity This will later be used by others, for now make use of it in cfg80211_drv_by_wiphy_idx() to return early if an invalid wiphy_idx has been provided. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/core.c | 5 ++++- net/wireless/core.h | 7 +++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 2b3e786ec53f..35d457b2751e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -42,6 +42,9 @@ cfg80211_drv_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *drv; + if (!wiphy_idx_valid(wiphy_idx)) + return NULL; + list_for_each_entry(drv, &cfg80211_drv_list, list) { if (drv->wiphy_idx == wiphy_idx) { result = drv; @@ -225,7 +228,7 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) drv->wiphy_idx = wiphy_counter++; - if (unlikely(drv->wiphy_idx < 0)) { + if (unlikely(!wiphy_idx_valid(drv->wiphy_idx))) { wiphy_counter--; mutex_unlock(&cfg80211_drv_mutex); /* ugh, wrapped! */ diff --git a/net/wireless/core.h b/net/wireless/core.h index 178378124800..4f2e0fe38ce3 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -63,6 +63,13 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) return container_of(wiphy, struct cfg80211_registered_device, wiphy); } +/* Note 0 is valid, hence phy0 */ +static inline +bool wiphy_idx_valid(int wiphy_idx) +{ + return (wiphy_idx >= 0); +} + extern struct mutex cfg80211_drv_mutex; extern struct list_head cfg80211_drv_list; -- cgit v1.2.3 From a1794390f1afc3631ac056e0f1677b7ab6f7ee74 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:21 -0500 Subject: cfg80211: rename cfg80211_drv_mutex to cfg80211_mutex cfg80211_drv_mutex is protecting more than the driver list, this renames it and documents what its currently supposed to protect. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/core.c | 35 ++++++++++++++++++++--------------- net/wireless/core.h | 6 +++--- net/wireless/nl80211.c | 20 ++++++++++---------- net/wireless/reg.c | 18 +++++++++--------- 4 files changed, 42 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 35d457b2751e..39d40d1e06db 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -31,7 +31,12 @@ MODULE_DESCRIPTION("wireless configuration support"); * only read the list, and that can happen quite * often because we need to do it for each command */ LIST_HEAD(cfg80211_drv_list); -DEFINE_MUTEX(cfg80211_drv_mutex); + +/* + * This is used to protect the cfg80211_drv_list, cfg80211_regdomain, and + * the last reguluatory request receipt in regd.c + */ +DEFINE_MUTEX(cfg80211_mutex); /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; @@ -55,7 +60,7 @@ cfg80211_drv_by_wiphy_idx(int wiphy_idx) return result; } -/* requires cfg80211_drv_mutex to be held! */ +/* requires cfg80211_mutex to be held! */ static struct cfg80211_registered_device * __cfg80211_drv_from_info(struct genl_info *info) { @@ -102,7 +107,7 @@ cfg80211_get_dev_from_info(struct genl_info *info) { struct cfg80211_registered_device *drv; - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); drv = __cfg80211_drv_from_info(info); /* if it is not an error we grab the lock on @@ -111,7 +116,7 @@ cfg80211_get_dev_from_info(struct genl_info *info) if (!IS_ERR(drv)) mutex_lock(&drv->mtx); - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); return drv; } @@ -122,7 +127,7 @@ cfg80211_get_dev_from_ifindex(int ifindex) struct cfg80211_registered_device *drv = ERR_PTR(-ENODEV); struct net_device *dev; - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); dev = dev_get_by_index(&init_net, ifindex); if (!dev) goto out; @@ -133,7 +138,7 @@ cfg80211_get_dev_from_ifindex(int ifindex) drv = ERR_PTR(-ENODEV); dev_put(dev); out: - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); return drv; } @@ -149,7 +154,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, struct cfg80211_registered_device *drv; int wiphy_idx, taken = -1, result, digits; - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); @@ -197,7 +202,7 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, result = 0; out_unlock: - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); if (result == 0) nl80211_notify_dev_rename(rdev); @@ -224,19 +229,19 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) drv->ops = ops; - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); drv->wiphy_idx = wiphy_counter++; if (unlikely(!wiphy_idx_valid(drv->wiphy_idx))) { wiphy_counter--; - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); /* ugh, wrapped! */ kfree(drv); return NULL; } - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); /* give it a proper name */ dev_set_name(&drv->wiphy.dev, PHY_NAME "%d", drv->wiphy_idx); @@ -314,7 +319,7 @@ int wiphy_register(struct wiphy *wiphy) /* check and set up bitrates */ ieee80211_set_bitrate_flags(wiphy); - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); /* set up regulatory info */ wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); @@ -334,7 +339,7 @@ int wiphy_register(struct wiphy *wiphy) res = 0; out_unlock: - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); return res; } EXPORT_SYMBOL(wiphy_register); @@ -344,7 +349,7 @@ void wiphy_unregister(struct wiphy *wiphy) struct cfg80211_registered_device *drv = wiphy_to_dev(wiphy); /* protect the device list */ - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); BUG_ON(!list_empty(&drv->netdev_list)); @@ -370,7 +375,7 @@ void wiphy_unregister(struct wiphy *wiphy) device_del(&drv->wiphy.dev); debugfs_remove(drv->wiphy.debugfsdir); - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); } EXPORT_SYMBOL(wiphy_unregister); diff --git a/net/wireless/core.h b/net/wireless/core.h index 4f2e0fe38ce3..f3ab00cbf766 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -70,7 +70,7 @@ bool wiphy_idx_valid(int wiphy_idx) return (wiphy_idx >= 0); } -extern struct mutex cfg80211_drv_mutex; +extern struct mutex cfg80211_mutex; extern struct list_head cfg80211_drv_list; struct cfg80211_internal_bss { @@ -89,13 +89,13 @@ struct cfg80211_internal_bss { * the driver's mutex! * * This means that you need to call cfg80211_put_dev() - * before being allowed to acquire &cfg80211_drv_mutex! + * before being allowed to acquire &cfg80211_mutex! * * This is necessary because we need to lock the global * mutex to get an item off the list safely, and then * we lock the drv mutex so it doesn't go away under us. * - * We don't want to keep cfg80211_drv_mutex locked + * We don't want to keep cfg80211_mutex locked * for all the time in order to allow requests on * other interfaces to go through at the same time. * diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b176bb800100..88a530f707e6 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -256,7 +256,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) int start = cb->args[0]; struct cfg80211_registered_device *dev; - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); list_for_each_entry(dev, &cfg80211_drv_list, list) { if (++idx <= start) continue; @@ -267,7 +267,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) break; } } - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); cb->args[0] = idx; @@ -470,7 +470,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * struct cfg80211_registered_device *dev; struct wireless_dev *wdev; - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); list_for_each_entry(dev, &cfg80211_drv_list, list) { if (wp_idx < wp_start) { wp_idx++; @@ -497,7 +497,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * wp_idx++; } out: - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); cb->args[0] = wp_idx; cb->args[1] = if_idx; @@ -1916,9 +1916,9 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) if (is_world_regdom(data)) return -EINVAL; #endif - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); /* This means the regulatory domain was already set, however * we don't want to confuse userspace with a "successful error" * message so lets just treat it as a success */ @@ -2112,7 +2112,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) unsigned int i; int err = -EINVAL; - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); if (!cfg80211_regdomain) goto out; @@ -2175,7 +2175,7 @@ nla_put_failure: genlmsg_cancel(msg, hdr); err = -EMSGSIZE; out: - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); return err; } @@ -2234,9 +2234,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) BUG_ON(rule_idx != num_rules); - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); r = set_regdom(rd); - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); return r; bad_reg: diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 2323644330cd..ba823120d245 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1116,7 +1116,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return -EINVAL; } -/* Caller must hold &cfg80211_drv_mutex */ +/* Caller must hold &cfg80211_mutex */ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, const char *alpha2, u32 country_ie_checksum, @@ -1188,13 +1188,13 @@ void regulatory_hint(struct wiphy *wiphy, const char *alpha2) int r; BUG_ON(!alpha2); - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, 0, ENVIRON_ANY); /* This is required so that the orig_* parameters are saved */ if (r == -EALREADY && wiphy->strict_regulatory) wiphy_update_regulatory(wiphy, REGDOM_SET_BY_DRIVER); - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); } EXPORT_SYMBOL(regulatory_hint); @@ -1225,7 +1225,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, if (!last_request) return; - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); /* IE len must be evenly divisible by 2 */ if (country_ie_len & 0x01) @@ -1307,7 +1307,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, country_ie_regdomain->alpha2, checksum, env); out: - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); } EXPORT_SYMBOL(regulatory_hint_11d); @@ -1562,7 +1562,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* Use this call to set the current regulatory domain. Conflicts with * multiple drivers can be ironed out later. Caller must've already - * kmalloc'd the rd structure. Caller must hold cfg80211_drv_mutex */ + * kmalloc'd the rd structure. Caller must hold cfg80211_mutex */ int set_regdom(const struct ieee80211_regdomain *rd) { int r; @@ -1586,7 +1586,7 @@ int set_regdom(const struct ieee80211_regdomain *rd) return r; } -/* Caller must hold cfg80211_drv_mutex */ +/* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { kfree(wiphy->regd); @@ -1633,7 +1633,7 @@ int regulatory_init(void) void regulatory_exit(void) { - mutex_lock(&cfg80211_drv_mutex); + mutex_lock(&cfg80211_mutex); reset_regdomains(); @@ -1644,5 +1644,5 @@ void regulatory_exit(void) platform_device_unregister(reg_pdev); - mutex_unlock(&cfg80211_drv_mutex); + mutex_unlock(&cfg80211_mutex); } -- cgit v1.2.3 From 80778f18c09673df2712c7da28aa920469adcae2 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:22 -0500 Subject: nl80211: disallow user requests prior to regulatory_init() If cfg80211 is built into the kernel there is perhaps a small time window betwen nl80211_init() and regulatory_init() where cfg80211_regdomain hasn't yet been initialized to let the wireless core do its work. During that rare case and time frame (if its even possible) we don't allow user regulatory changes as cfg80211 is working on enabling its first regulatory domain. To check for cfg80211_regdomain we now contend the entire operation using the cfg80211_mutex. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 88a530f707e6..130fc2561bac 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1906,24 +1906,42 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) int r; char *data = NULL; - if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) - return -EINVAL; + /* + * You should only get this when cfg80211 hasn't yet initialized + * completely when built-in to the kernel right between the time + * window between nl80211_init() and regulatory_init(), if that is + * even possible. + */ + mutex_lock(&cfg80211_mutex); + if (unlikely(!cfg80211_regdomain)) { + r = -EINPROGRESS; + goto out; + } + + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) { + r = -EINVAL; + goto out; + } data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); #ifdef CONFIG_WIRELESS_OLD_REGULATORY /* We ignore world regdom requests with the old regdom setup */ - if (is_world_regdom(data)) - return -EINVAL; + if (is_world_regdom(data)) { + r = -EINVAL; + goto out; + } #endif - mutex_lock(&cfg80211_mutex); r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); - mutex_unlock(&cfg80211_mutex); - /* This means the regulatory domain was already set, however + /* + * This means the regulatory domain was already set, however * we don't want to confuse userspace with a "successful error" - * message so lets just treat it as a success */ + * message so lets just treat it as a success + */ if (r == -EALREADY) r = 0; +out: + mutex_unlock(&cfg80211_mutex); return r; } -- cgit v1.2.3 From ba25c1414264f1f5fc046cf34d20947e41713a0d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:23 -0500 Subject: cfg80211: add regulatory_hint_core() to separate the core reg hint This makes the core hint path more readable and allows for us to later make it obvious under what circumstances we need locking or not. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ba823120d245..6373a78a37e7 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1050,11 +1050,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, case REGDOM_SET_BY_INIT: return -EINVAL; case REGDOM_SET_BY_CORE: - /* - * Always respect new wireless core hints, should only happen - * when updating the world regulatory domain at init. - */ - return 0; + return -EINVAL; case REGDOM_SET_BY_COUNTRY_IE: if (unlikely(!is_an_alpha2(alpha2))) return -EINVAL; @@ -1183,6 +1179,26 @@ new_request: return call_crda(alpha2); } +static int regulatory_hint_core(const char *alpha2) +{ + struct regulatory_request *request; + + BUG_ON(last_request); + + request = kzalloc(sizeof(struct regulatory_request), + GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->alpha2[0] = alpha2[0]; + request->alpha2[1] = alpha2[1]; + request->initiator = REGDOM_SET_BY_CORE; + + last_request = request; + + return call_crda(alpha2); +} + void regulatory_hint(struct wiphy *wiphy, const char *alpha2) { int r; @@ -1616,16 +1632,16 @@ int regulatory_init(void) * stuck with the static values. We ignore "EU" code as * that is not a valid ISO / IEC 3166 alpha2 */ if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') - err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, - ieee80211_regdom, 0, ENVIRON_ANY); + err = regulatory_hint_core(ieee80211_regdom); #else cfg80211_regdomain = cfg80211_world_regdom; - err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", 0, ENVIRON_ANY); - if (err) + err = regulatory_hint_core("00"); + if (err) { printk(KERN_ERR "cfg80211: calling CRDA failed - " "unable to update world regulatory domain, " "using static definition\n"); + } #endif return 0; -- cgit v1.2.3 From bcf4f99b7b1e0971b79e8df40331e77fc1744049 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:24 -0500 Subject: cfg80211: propagate -ENOMEM during regulatory_init() Calling kobject_uevent_env() can fail mainly due to out of memory conditions. We do not want to continue during such conditions so propagate that as well instead of letting cfg80211 load as if everything is peachy. Additionally lets clarify that when CRDA is not called during cfg80211's initialization _and_ if the error is not an -ENOMEM its because kobject_uevent_env() failed to call CRDA, not because CRDA failed. For those who want to find out why we also let you do so by enabling the kernel config CONFIG_CFG80211_REG_DEBUG -- you'll get an actual stack trace. So for now we'll treat non -ENOMEM kobject_uevent_env() failures as non fatal during cfg80211's initialization. CC: Greg KH Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6373a78a37e7..47d505616a4b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1616,7 +1616,7 @@ void reg_device_remove(struct wiphy *wiphy) int regulatory_init(void) { - int err; + int err = 0; reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); if (IS_ERR(reg_pdev)) @@ -1637,12 +1637,24 @@ int regulatory_init(void) cfg80211_regdomain = cfg80211_world_regdom; err = regulatory_hint_core("00"); +#endif if (err) { - printk(KERN_ERR "cfg80211: calling CRDA failed - " - "unable to update world regulatory domain, " - "using static definition\n"); - } + if (err == -ENOMEM) + return err; + /* + * N.B. kobject_uevent_env() can fail mainly for when we're out + * memory which is handled and propagated appropriately above + * but it can also fail during a netlink_broadcast() or during + * early boot for call_usermodehelper(). For now treat these + * errors as non-fatal. + */ + printk(KERN_ERR "cfg80211: kobject_uevent_env() was unable " + "to call CRDA during init"); +#ifdef CONFIG_CFG80211_REG_DEBUG + /* We want to find out exactly why when debugging */ + WARN_ON(err); #endif + } return 0; } -- cgit v1.2.3 From 761cf7ecffc4bc079679e65c3b1ab107c1c1fb56 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:25 -0500 Subject: cfg80211: add assert_cfg80211_lock() to ensure proper protection Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/core.c | 5 ++++- net/wireless/core.h | 6 ++++++ net/wireless/nl80211.c | 3 ++- net/wireless/reg.c | 15 +++++++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 39d40d1e06db..e347093ccc73 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -50,6 +49,8 @@ cfg80211_drv_by_wiphy_idx(int wiphy_idx) if (!wiphy_idx_valid(wiphy_idx)) return NULL; + assert_cfg80211_lock(); + list_for_each_entry(drv, &cfg80211_drv_list, list) { if (drv->wiphy_idx == wiphy_idx) { result = drv; @@ -69,6 +70,8 @@ __cfg80211_drv_from_info(struct genl_info *info) struct net_device *dev; int err = -EINVAL; + assert_cfg80211_lock(); + if (info->attrs[NL80211_ATTR_WIPHY]) { bywiphyidx = cfg80211_drv_by_wiphy_idx( nla_get_u32(info->attrs[NL80211_ATTR_WIPHY])); diff --git a/net/wireless/core.h b/net/wireless/core.h index f3ab00cbf766..982cc6be3484 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,11 @@ bool wiphy_idx_valid(int wiphy_idx) extern struct mutex cfg80211_mutex; extern struct list_head cfg80211_drv_list; +static inline void assert_cfg80211_lock(void) +{ + BUG_ON(!mutex_is_locked(&cfg80211_mutex)); +} + struct cfg80211_internal_bss { struct list_head list; struct rb_node rbn; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 130fc2561bac..e0d3879b8852 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -138,6 +137,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, int i; u16 ifmodes = dev->wiphy.interface_modes; + assert_cfg80211_lock(); + hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -1; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 47d505616a4b..e49ac9b2adac 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -276,6 +276,8 @@ static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) static bool regdom_changed(const char *alpha2) { + assert_cfg80211_lock(); + if (!cfg80211_regdomain) return true; if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) @@ -830,6 +832,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; + assert_cfg80211_lock(); + sband = wiphy->bands[band]; BUG_ON(chan_idx >= sband->n_channels); chan = &sband->channels[chan_idx]; @@ -1042,6 +1046,9 @@ static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, const char *alpha2) { + + assert_cfg80211_lock(); + /* All initial requests are respected */ if (!last_request) return 0; @@ -1122,6 +1129,8 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, bool intersect = false; int r = 0; + assert_cfg80211_lock(); + r = ignore_request(wiphy, set_by, alpha2); if (r == REG_INTERSECT) { @@ -1217,6 +1226,8 @@ EXPORT_SYMBOL(regulatory_hint); static bool reg_same_country_ie_hint(struct wiphy *wiphy, u32 country_ie_checksum) { + assert_cfg80211_lock(); + if (!last_request->wiphy) return false; if (likely(last_request->wiphy != wiphy)) @@ -1583,6 +1594,8 @@ int set_regdom(const struct ieee80211_regdomain *rd) { int r; + assert_cfg80211_lock(); + /* Note that this doesn't update the wiphys, this is done below */ r = __set_regdom(rd); if (r) { @@ -1605,6 +1618,8 @@ int set_regdom(const struct ieee80211_regdomain *rd) /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { + assert_cfg80211_lock(); + kfree(wiphy->regd); if (!last_request || !last_request->wiphy) return; -- cgit v1.2.3 From 806a9e39670be4f1f861c346ec102a79e81b90c3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:26 -0500 Subject: cfg80211: make regulatory_request use wiphy_idx instead of wiphy We do this so later on we can move the pending requests onto a workqueue. By using the wiphy_idx instead of the wiphy we can later easily check if the wiphy has disappeared or not. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/core.c | 30 ++++++++++++++++++++++--- net/wireless/core.h | 12 ++++++++++ net/wireless/reg.c | 64 +++++++++++++++++++++++++++++++++-------------------- 3 files changed, 79 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index e347093ccc73..b1a354b7fc06 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -40,9 +40,8 @@ DEFINE_MUTEX(cfg80211_mutex); /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; -/* requires cfg80211_drv_mutex to be held! */ -static struct cfg80211_registered_device * -cfg80211_drv_by_wiphy_idx(int wiphy_idx) +/* requires cfg80211_mutex to be held! */ +struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx) { struct cfg80211_registered_device *result = NULL, *drv; @@ -61,6 +60,31 @@ cfg80211_drv_by_wiphy_idx(int wiphy_idx) return result; } +int get_wiphy_idx(struct wiphy *wiphy) +{ + struct cfg80211_registered_device *drv; + if (!wiphy) + return WIPHY_IDX_STALE; + drv = wiphy_to_dev(wiphy); + return drv->wiphy_idx; +} + +/* requires cfg80211_drv_mutex to be held! */ +struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) +{ + struct cfg80211_registered_device *drv; + + if (!wiphy_idx_valid(wiphy_idx)) + return NULL; + + assert_cfg80211_lock(); + + drv = cfg80211_drv_by_wiphy_idx(wiphy_idx); + if (!drv) + return NULL; + return &drv->wiphy; +} + /* requires cfg80211_mutex to be held! */ static struct cfg80211_registered_device * __cfg80211_drv_from_info(struct genl_info *info) diff --git a/net/wireless/core.h b/net/wireless/core.h index 982cc6be3484..cd8e6e3ef116 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -79,6 +79,12 @@ static inline void assert_cfg80211_lock(void) BUG_ON(!mutex_is_locked(&cfg80211_mutex)); } +/* + * You can use this to mark a wiphy_idx as not having an associated wiphy. + * It guarantees cfg80211_drv_by_wiphy_idx(wiphy_idx) will return NULL + */ +#define WIPHY_IDX_STALE -1 + struct cfg80211_internal_bss { struct list_head list; struct rb_node rbn; @@ -88,6 +94,9 @@ struct cfg80211_internal_bss { struct cfg80211_bss pub; }; +struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx); +int get_wiphy_idx(struct wiphy *wiphy); + /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. @@ -111,6 +120,9 @@ struct cfg80211_internal_bss { extern struct cfg80211_registered_device * cfg80211_get_dev_from_info(struct genl_info *info); +/* requires cfg80211_drv_mutex to be held! */ +struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx); + /* identical to cfg80211_get_dev_from_info but only operate on ifindex */ extern struct cfg80211_registered_device * cfg80211_get_dev_from_ifindex(int ifindex); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e49ac9b2adac..d44f3b5481ad 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -831,9 +831,12 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, const struct ieee80211_power_rule *power_rule = NULL; struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; + struct wiphy *request_wiphy; assert_cfg80211_lock(); + request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + sband = wiphy->bands[band]; BUG_ON(chan_idx >= sband->n_channels); chan = &sband->channels[chan_idx]; @@ -881,8 +884,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, power_rule = ®_rule->power_rule; if (last_request->initiator == REGDOM_SET_BY_DRIVER && - last_request->wiphy && last_request->wiphy == wiphy && - last_request->wiphy->strict_regulatory) { + request_wiphy && request_wiphy == wiphy && + request_wiphy->strict_regulatory) { /* This gaurantees the driver's requested regulatory domain * will always be used as a base for further regulatory * settings */ @@ -1046,6 +1049,7 @@ static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, const char *alpha2) { + struct wiphy *last_wiphy = NULL; assert_cfg80211_lock(); @@ -1059,10 +1063,13 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, case REGDOM_SET_BY_CORE: return -EINVAL; case REGDOM_SET_BY_COUNTRY_IE: + + last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (unlikely(!is_an_alpha2(alpha2))) return -EINVAL; if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { - if (last_request->wiphy != wiphy) { + if (last_wiphy != wiphy) { /* * Two cards with two APs claiming different * different Country IE alpha2s. We could @@ -1163,7 +1170,7 @@ new_request: request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; request->initiator = set_by; - request->wiphy = wiphy; + request->wiphy_idx = get_wiphy_idx(wiphy); request->intersect = intersect; request->country_ie_checksum = country_ie_checksum; request->country_ie_env = env; @@ -1226,11 +1233,16 @@ EXPORT_SYMBOL(regulatory_hint); static bool reg_same_country_ie_hint(struct wiphy *wiphy, u32 country_ie_checksum) { + struct wiphy *request_wiphy; + assert_cfg80211_lock(); - if (!last_request->wiphy) + request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + + if (!request_wiphy) return false; - if (likely(last_request->wiphy != wiphy)) + + if (likely(request_wiphy != wiphy)) return !country_ie_integrity_changes(country_ie_checksum); /* We should not have let these through at this point, they * should have been picked up earlier by the first alpha2 check @@ -1278,14 +1290,15 @@ void regulatory_hint_11d(struct wiphy *wiphy, /* We will run this for *every* beacon processed for the BSSID, so * we optimize an early check to exit out early if we don't have to * do anything */ - if (likely(last_request->wiphy)) { + if (likely(wiphy_idx_valid(last_request->wiphy_idx))) { struct cfg80211_registered_device *drv_last_ie; - drv_last_ie = wiphy_to_dev(last_request->wiphy); + drv_last_ie = + cfg80211_drv_by_wiphy_idx(last_request->wiphy_idx); /* Lets keep this simple -- we trust the first AP * after we intersect with CRDA */ - if (likely(last_request->wiphy == wiphy)) { + if (likely(&drv_last_ie->wiphy == wiphy)) { /* Ignore IEs coming in on this wiphy with * the same alpha2 and environment cap */ if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, @@ -1377,13 +1390,12 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) { if (is_intersected_alpha2(rd->alpha2)) { - struct wiphy *wiphy = NULL; - struct cfg80211_registered_device *drv; if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { - if (last_request->wiphy) { - wiphy = last_request->wiphy; - drv = wiphy_to_dev(wiphy); + struct cfg80211_registered_device *drv; + drv = cfg80211_drv_by_wiphy_idx( + last_request->wiphy_idx); + if (drv) { printk(KERN_INFO "cfg80211: Current regulatory " "domain updated by AP to: %c%c\n", drv->country_ie_alpha2[0], @@ -1449,7 +1461,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) { const struct ieee80211_regdomain *intersected_rd = NULL; struct cfg80211_registered_device *drv = NULL; - struct wiphy *wiphy = NULL; + struct wiphy *request_wiphy; /* Some basic sanity checks first */ if (is_world_regdom(rd->alpha2)) { @@ -1477,8 +1489,6 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return -EINVAL; } - wiphy = last_request->wiphy; - /* Now lets set the regulatory domain, update all driver channels * and finally inform them of what we have done, in case they want * to review or adjust their own settings based on their own @@ -1494,6 +1504,8 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return -EINVAL; } + request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + if (!last_request->intersect) { int r; @@ -1506,9 +1518,9 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* For a driver hint, lets copy the regulatory domain the * driver wanted to the wiphy to deal with conflicts */ - BUG_ON(last_request->wiphy->regd); + BUG_ON(request_wiphy->regd); - r = reg_copy_regd(&last_request->wiphy->regd, rd); + r = reg_copy_regd(&request_wiphy->regd, rd); if (r) return r; @@ -1529,7 +1541,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * However if a driver requested this specific regulatory * domain we keep it for its private use */ if (last_request->initiator == REGDOM_SET_BY_DRIVER) - last_request->wiphy->regd = rd; + request_wiphy->regd = rd; else kfree(rd); @@ -1569,7 +1581,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (!intersected_rd) return -EINVAL; - drv = wiphy_to_dev(wiphy); + drv = wiphy_to_dev(request_wiphy); drv->country_ie_alpha2[0] = rd->alpha2[0]; drv->country_ie_alpha2[1] = rd->alpha2[1]; @@ -1618,14 +1630,18 @@ int set_regdom(const struct ieee80211_regdomain *rd) /* Caller must hold cfg80211_mutex */ void reg_device_remove(struct wiphy *wiphy) { + struct wiphy *request_wiphy; + assert_cfg80211_lock(); + request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); + kfree(wiphy->regd); - if (!last_request || !last_request->wiphy) + if (!last_request || !request_wiphy) return; - if (last_request->wiphy != wiphy) + if (request_wiphy != wiphy) return; - last_request->wiphy = NULL; + last_request->wiphy_idx = WIPHY_IDX_STALE; last_request->country_ie_env = ENVIRON_ANY; } -- cgit v1.2.3 From d335fe6391c2d86582cf71ef5773a161ee604608 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:27 -0500 Subject: cfg80211: protect first access of last_request on 11d hint under mutex We were not protecting last_request there is a small possible race between an 11d hint and another routine which calls reset_regdomains() which can prevent a valid country IE from being processed. This is not critical as it will still be procesed soon after but locking prior to it is correct. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index d44f3b5481ad..b47445219a48 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1261,11 +1261,13 @@ void regulatory_hint_11d(struct wiphy *wiphy, u32 checksum = 0; enum environment_cap env = ENVIRON_ANY; - if (!last_request) - return; - mutex_lock(&cfg80211_mutex); + if (unlikely(!last_request)) { + mutex_unlock(&cfg80211_mutex); + return; + } + /* IE len must be evenly divisible by 2 */ if (country_ie_len & 0x01) goto out; -- cgit v1.2.3 From 915278e099e532f3a874764e28c81958f788b9f1 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:28 -0500 Subject: cfg80211: remove likely from an 11d hint case Truth of the matter this was confusing people so mark it as unlikely as that is the case now. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index b47445219a48..baf50cac6e0a 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1331,14 +1331,16 @@ void regulatory_hint_11d(struct wiphy *wiphy, if (!rd) goto out; - /* This will not happen right now but we leave it here for the + /* + * This will not happen right now but we leave it here for the * the future when we want to add suspend/resume support and having * the user move to another country after doing so, or having the user - * move to another AP. Right now we just trust the first AP. This is why - * this is marked as likley(). If we hit this before we add this support - * we want to be informed of it as it would indicate a mistake in the - * current design */ - if (likely(WARN_ON(reg_same_country_ie_hint(wiphy, checksum)))) + * move to another AP. Right now we just trust the first AP. + * + * If we hit this before we add this support we want to be informed of + * it as it would indicate a mistake in the current design + */ + if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum))) goto out; /* We keep this around for when CRDA comes back with a response so -- cgit v1.2.3 From 0441d6ffc705de17d85923264a1b03b71ebfccb8 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:29 -0500 Subject: cfg80211: free rd on unlikely event on 11d hint This was never happening but it was still wrong, so correct it. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index baf50cac6e0a..af762be3f0a1 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1341,7 +1341,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, * it as it would indicate a mistake in the current design */ if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum))) - goto out; + goto free_rd_out; /* We keep this around for when CRDA comes back with a response so * we can intersect with that */ @@ -1350,6 +1350,10 @@ void regulatory_hint_11d(struct wiphy *wiphy, __regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE, country_ie_regdomain->alpha2, checksum, env); + goto out; + +free_rd_out: + kfree(rd); out: mutex_unlock(&cfg80211_mutex); } -- cgit v1.2.3 From fe33eb390854886e1fd5d4835d833b80d145aafb Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:30 -0500 Subject: cfg80211: move all regulatory hints to workqueue All regulatory hints (core, driver, userspace and 11d) are now processed in a workqueue. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 30 +++------ net/wireless/reg.c | 178 ++++++++++++++++++++++++++++++++++++++++++++----- net/wireless/reg.h | 2 + 3 files changed, 175 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index e0d3879b8852..97f69bed3fe2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1915,34 +1915,24 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) */ mutex_lock(&cfg80211_mutex); if (unlikely(!cfg80211_regdomain)) { - r = -EINPROGRESS; - goto out; + mutex_unlock(&cfg80211_mutex); + return -EINPROGRESS; } + mutex_unlock(&cfg80211_mutex); - if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) { - r = -EINVAL; - goto out; - } + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); #ifdef CONFIG_WIRELESS_OLD_REGULATORY /* We ignore world regdom requests with the old regdom setup */ - if (is_world_regdom(data)) { - r = -EINVAL; - goto out; - } + if (is_world_regdom(data)) + return -EINVAL; #endif - r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, 0, ENVIRON_ANY); - /* - * This means the regulatory domain was already set, however - * we don't want to confuse userspace with a "successful error" - * message so lets just treat it as a success - */ - if (r == -EALREADY) - r = 0; -out: - mutex_unlock(&cfg80211_mutex); + + r = regulatory_hint_user(data); + return r; } diff --git a/net/wireless/reg.c b/net/wireless/reg.c index af762be3f0a1..0b8c4b86789a 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -64,6 +64,9 @@ const struct ieee80211_regdomain *cfg80211_regdomain; * what it thinks should apply for the same country */ static const struct ieee80211_regdomain *country_ie_regdomain; +static LIST_HEAD(reg_requests_list); +static spinlock_t reg_requests_lock; + /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 1, @@ -831,7 +834,7 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, const struct ieee80211_power_rule *power_rule = NULL; struct ieee80211_supported_band *sband; struct ieee80211_channel *chan; - struct wiphy *request_wiphy; + struct wiphy *request_wiphy = NULL; assert_cfg80211_lock(); @@ -1195,6 +1198,89 @@ new_request: return call_crda(alpha2); } +/* This currently only processes user and driver regulatory hints */ +static int reg_process_hint(struct regulatory_request *reg_request) +{ + int r = 0; + struct wiphy *wiphy = NULL; + + BUG_ON(!reg_request->alpha2); + + mutex_lock(&cfg80211_mutex); + + if (wiphy_idx_valid(reg_request->wiphy_idx)) + wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); + + if (reg_request->initiator == REGDOM_SET_BY_DRIVER && + !wiphy) { + r = -ENODEV; + goto out; + } + + r = __regulatory_hint(wiphy, + reg_request->initiator, + reg_request->alpha2, + reg_request->country_ie_checksum, + reg_request->country_ie_env); + /* This is required so that the orig_* parameters are saved */ + if (r == -EALREADY && wiphy && wiphy->strict_regulatory) + wiphy_update_regulatory(wiphy, reg_request->initiator); +out: + mutex_unlock(&cfg80211_mutex); + + if (r == -EALREADY) + r = 0; + + return r; +} + +static void reg_process_pending_hints(void) + { + struct regulatory_request *reg_request; + int r; + + spin_lock(®_requests_lock); + while (!list_empty(®_requests_list)) { + reg_request = list_first_entry(®_requests_list, + struct regulatory_request, + list); + list_del_init(®_request->list); + spin_unlock(®_requests_lock); + + r = reg_process_hint(reg_request); +#ifdef CONFIG_CFG80211_REG_DEBUG + if (r && (reg_request->initiator == REGDOM_SET_BY_DRIVER || + reg_request->initiator == REGDOM_SET_BY_COUNTRY_IE)) + printk(KERN_ERR "cfg80211: wiphy_idx %d sent a " + "regulatory hint for %c%c but now has " + "gone fishing, ignoring request\n", + reg_request->wiphy_idx, + reg_request->alpha2[0], + reg_request->alpha2[1]); +#endif + kfree(reg_request); + spin_lock(®_requests_lock); + } + spin_unlock(®_requests_lock); +} + +static void reg_todo(struct work_struct *work) +{ + reg_process_pending_hints(); +} + +static DECLARE_WORK(reg_work, reg_todo); + +static void queue_regulatory_request(struct regulatory_request *request) +{ + spin_lock(®_requests_lock); + list_add_tail(&request->list, ®_requests_list); + spin_unlock(®_requests_lock); + + schedule_work(®_work); +} + +/* Core regulatory hint -- happens once during cfg80211_init() */ static int regulatory_hint_core(const char *alpha2) { struct regulatory_request *request; @@ -1210,23 +1296,56 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[1] = alpha2[1]; request->initiator = REGDOM_SET_BY_CORE; - last_request = request; + queue_regulatory_request(request); - return call_crda(alpha2); + return 0; } -void regulatory_hint(struct wiphy *wiphy, const char *alpha2) +/* User hints */ +int regulatory_hint_user(const char *alpha2) { - int r; + struct regulatory_request *request; + BUG_ON(!alpha2); - mutex_lock(&cfg80211_mutex); - r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, - alpha2, 0, ENVIRON_ANY); - /* This is required so that the orig_* parameters are saved */ - if (r == -EALREADY && wiphy->strict_regulatory) - wiphy_update_regulatory(wiphy, REGDOM_SET_BY_DRIVER); - mutex_unlock(&cfg80211_mutex); + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->wiphy_idx = WIPHY_IDX_STALE; + request->alpha2[0] = alpha2[0]; + request->alpha2[1] = alpha2[1]; + request->initiator = REGDOM_SET_BY_USER, + + queue_regulatory_request(request); + + return 0; +} + +/* Driver hints */ +int regulatory_hint(struct wiphy *wiphy, const char *alpha2) +{ + struct regulatory_request *request; + + BUG_ON(!alpha2); + BUG_ON(!wiphy); + + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->wiphy_idx = get_wiphy_idx(wiphy); + + /* Must have registered wiphy first */ + BUG_ON(!wiphy_idx_valid(request->wiphy_idx)); + + request->alpha2[0] = alpha2[0]; + request->alpha2[1] = alpha2[1]; + request->initiator = REGDOM_SET_BY_DRIVER; + + queue_regulatory_request(request); + + return 0; } EXPORT_SYMBOL(regulatory_hint); @@ -1260,6 +1379,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, char alpha2[2]; u32 checksum = 0; enum environment_cap env = ENVIRON_ANY; + struct regulatory_request *request; mutex_lock(&cfg80211_mutex); @@ -1343,14 +1463,26 @@ void regulatory_hint_11d(struct wiphy *wiphy, if (WARN_ON(reg_same_country_ie_hint(wiphy, checksum))) goto free_rd_out; + request = kzalloc(sizeof(struct regulatory_request), GFP_KERNEL); + if (!request) + goto free_rd_out; + /* We keep this around for when CRDA comes back with a response so * we can intersect with that */ country_ie_regdomain = rd; - __regulatory_hint(wiphy, REGDOM_SET_BY_COUNTRY_IE, - country_ie_regdomain->alpha2, checksum, env); + request->wiphy_idx = get_wiphy_idx(wiphy); + request->alpha2[0] = rd->alpha2[0]; + request->alpha2[1] = rd->alpha2[1]; + request->initiator = REGDOM_SET_BY_COUNTRY_IE; + request->country_ie_checksum = checksum; + request->country_ie_env = env; + + mutex_unlock(&cfg80211_mutex); - goto out; + queue_regulatory_request(request); + + return; free_rd_out: kfree(rd); @@ -1661,6 +1793,8 @@ int regulatory_init(void) if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); + spin_lock_init(®_requests_lock); + #ifdef CONFIG_WIRELESS_OLD_REGULATORY cfg80211_regdomain = static_regdom(ieee80211_regdom); @@ -1700,6 +1834,10 @@ int regulatory_init(void) void regulatory_exit(void) { + struct regulatory_request *reg_request, *tmp; + + cancel_work_sync(®_work); + mutex_lock(&cfg80211_mutex); reset_regdomains(); @@ -1711,5 +1849,15 @@ void regulatory_exit(void) platform_device_unregister(reg_pdev); + spin_lock(®_requests_lock); + if (!list_empty(®_requests_list)) { + list_for_each_entry_safe(reg_request, tmp, + ®_requests_list, list) { + list_del(®_request->list); + kfree(reg_request); + } + } + spin_unlock(®_requests_lock); + mutex_unlock(&cfg80211_mutex); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index fe8c83f34fb7..4730def5a69d 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -6,6 +6,8 @@ extern const struct ieee80211_regdomain *cfg80211_regdomain; bool is_world_regdom(const char *alpha2); bool reg_is_valid_request(const char *alpha2); +int regulatory_hint_user(const char *alpha2); + void reg_device_remove(struct wiphy *wiphy); int regulatory_init(void); -- cgit v1.2.3 From fb1fc7add5d205c1db2fa323af1367c3cd4dced2 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:31 -0500 Subject: cfg80211: comments style cleanup Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 299 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 199 insertions(+), 100 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0b8c4b86789a..7ecb9033ad42 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -54,14 +54,18 @@ static u32 supported_bandwidths[] = { MHZ_TO_KHZ(20), }; -/* Central wireless core regulatory domains, we only need two, +/* + * Central wireless core regulatory domains, we only need two, * the current one and a world regulatory domain in case we have no - * information to give us an alpha2 */ + * information to give us an alpha2 + */ const struct ieee80211_regdomain *cfg80211_regdomain; -/* We use this as a place for the rd structure built from the +/* + * We use this as a place for the rd structure built from the * last parsed country IE to rest until CRDA gets back to us with - * what it thinks should apply for the same country */ + * what it thinks should apply for the same country + */ static const struct ieee80211_regdomain *country_ie_regdomain; static LIST_HEAD(reg_requests_list); @@ -86,9 +90,11 @@ static char *ieee80211_regdom = "US"; module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); -/* We assume 40 MHz bandwidth for the old regulatory work. +/* + * We assume 40 MHz bandwidth for the old regulatory work. * We make emphasis we are using the exact same frequencies - * as before */ + * as before + */ static const struct ieee80211_regdomain us_regdom = { .n_reg_rules = 6, @@ -127,8 +133,10 @@ static const struct ieee80211_regdomain jp_regdom = { static const struct ieee80211_regdomain eu_regdom = { .n_reg_rules = 6, - /* This alpha2 is bogus, we leave it here just for stupid - * backward compatibility */ + /* + * This alpha2 is bogus, we leave it here just for stupid + * backward compatibility + */ .alpha2 = "EU", .reg_rules = { /* IEEE 802.11b/g, channels 1..13 */ @@ -197,8 +205,10 @@ static void reset_regdomains(void) cfg80211_regdomain = NULL; } -/* Dynamic world regulatory domain requested by the wireless - * core upon initialization */ +/* + * Dynamic world regulatory domain requested by the wireless + * core upon initialization + */ static void update_world_regdomain(const struct ieee80211_regdomain *rd) { BUG_ON(!last_request); @@ -239,8 +249,10 @@ static bool is_unknown_alpha2(const char *alpha2) { if (!alpha2) return false; - /* Special case where regulatory domain was built by driver - * but a specific alpha2 cannot be determined */ + /* + * Special case where regulatory domain was built by driver + * but a specific alpha2 cannot be determined + */ if (alpha2[0] == '9' && alpha2[1] == '9') return true; return false; @@ -250,9 +262,11 @@ static bool is_intersected_alpha2(const char *alpha2) { if (!alpha2) return false; - /* Special case where regulatory domain is the + /* + * Special case where regulatory domain is the * result of an intersection between two regulatory domain - * structures */ + * structures + */ if (alpha2[0] == '9' && alpha2[1] == '8') return true; return false; @@ -307,8 +321,10 @@ static bool country_ie_integrity_changes(u32 checksum) return false; } -/* This lets us keep regulatory code which is updated on a regulatory - * basis in userspace. */ +/* + * This lets us keep regulatory code which is updated on a regulatory + * basis in userspace. + */ static int call_crda(const char *alpha2) { char country_env[9 + 2] = "COUNTRY="; @@ -419,10 +435,12 @@ static bool freq_in_rule_band(const struct ieee80211_freq_range *freq_range, #undef ONE_GHZ_IN_KHZ } -/* Converts a country IE to a regulatory domain. A regulatory domain +/* + * Converts a country IE to a regulatory domain. A regulatory domain * structure has a lot of information which the IE doesn't yet have, * so for the other values we use upper max values as we will intersect - * with our userspace regulatory agent to get lower bounds. */ + * with our userspace regulatory agent to get lower bounds. + */ static struct ieee80211_regdomain *country_ie_2_rd( u8 *country_ie, u8 country_ie_len, @@ -467,9 +485,11 @@ static struct ieee80211_regdomain *country_ie_2_rd( *checksum ^= ((flags ^ alpha2[0] ^ alpha2[1]) << 8); - /* We need to build a reg rule for each triplet, but first we must + /* + * We need to build a reg rule for each triplet, but first we must * calculate the number of reg rules we will need. We will need one - * for each channel subband */ + * for each channel subband + */ while (country_ie_len >= 3) { int end_channel = 0; struct ieee80211_country_ie_triplet *triplet = @@ -507,9 +527,11 @@ static struct ieee80211_regdomain *country_ie_2_rd( if (cur_sub_max_channel < cur_channel) return NULL; - /* Do not allow overlapping channels. Also channels + /* + * Do not allow overlapping channels. Also channels * passed in each subband must be monotonically - * increasing */ + * increasing + */ if (last_sub_max_channel) { if (cur_channel <= last_sub_max_channel) return NULL; @@ -517,10 +539,12 @@ static struct ieee80211_regdomain *country_ie_2_rd( return NULL; } - /* When dot11RegulatoryClassesRequired is supported + /* + * When dot11RegulatoryClassesRequired is supported * we can throw ext triplets as part of this soup, * for now we don't care when those change as we - * don't support them */ + * don't support them + */ *checksum ^= ((cur_channel ^ cur_sub_max_channel) << 8) | ((cur_sub_max_channel ^ cur_sub_max_channel) << 16) | ((triplet->chans.max_power ^ cur_sub_max_channel) << 24); @@ -531,8 +555,10 @@ static struct ieee80211_regdomain *country_ie_2_rd( country_ie_len -= 3; num_rules++; - /* Note: this is not a IEEE requirement but - * simply a memory requirement */ + /* + * Note: this is not a IEEE requirement but + * simply a memory requirement + */ if (num_rules > NL80211_MAX_SUPP_REG_RULES) return NULL; } @@ -560,8 +586,10 @@ static struct ieee80211_regdomain *country_ie_2_rd( struct ieee80211_freq_range *freq_range = NULL; struct ieee80211_power_rule *power_rule = NULL; - /* Must parse if dot11RegulatoryClassesRequired is true, - * we don't support this yet */ + /* + * Must parse if dot11RegulatoryClassesRequired is true, + * we don't support this yet + */ if (triplet->ext.reg_extension_id >= IEEE80211_COUNTRY_EXTENSION_ID) { country_ie += 3; @@ -583,10 +611,12 @@ static struct ieee80211_regdomain *country_ie_2_rd( end_channel = triplet->chans.first_channel + (4 * (triplet->chans.num_channels - 1)); - /* The +10 is since the regulatory domain expects + /* + * The +10 is since the regulatory domain expects * the actual band edge, not the center of freq for * its start and end freqs, assuming 20 MHz bandwidth on - * the channels passed */ + * the channels passed + */ freq_range->start_freq_khz = MHZ_TO_KHZ(ieee80211_channel_to_frequency( triplet->chans.first_channel) - 10); @@ -594,9 +624,11 @@ static struct ieee80211_regdomain *country_ie_2_rd( MHZ_TO_KHZ(ieee80211_channel_to_frequency( end_channel) + 10); - /* Large arbitrary values, we intersect later */ - /* Increment this if we ever support >= 40 MHz channels - * in IEEE 802.11 */ + /* + * These are large arbitrary values we use to intersect later. + * Increment this if we ever support >= 40 MHz channels + * in IEEE 802.11 + */ freq_range->max_bandwidth_khz = MHZ_TO_KHZ(40); power_rule->max_antenna_gain = DBI_TO_MBI(100); power_rule->max_eirp = DBM_TO_MBM(100); @@ -612,8 +644,10 @@ static struct ieee80211_regdomain *country_ie_2_rd( } -/* Helper for regdom_intersect(), this does the real - * mathematical intersection fun */ +/* + * Helper for regdom_intersect(), this does the real + * mathematical intersection fun + */ static int reg_rules_intersect( const struct ieee80211_reg_rule *rule1, const struct ieee80211_reg_rule *rule2, @@ -691,11 +725,13 @@ static struct ieee80211_regdomain *regdom_intersect( if (!rd1 || !rd2) return NULL; - /* First we get a count of the rules we'll need, then we actually + /* + * First we get a count of the rules we'll need, then we actually * build them. This is to so we can malloc() and free() a * regdomain once. The reason we use reg_rules_intersect() here * is it will return -EINVAL if the rule computed makes no sense. - * All rules that do check out OK are valid. */ + * All rules that do check out OK are valid. + */ for (x = 0; x < rd1->n_reg_rules; x++) { rule1 = &rd1->reg_rules[x]; @@ -723,14 +759,18 @@ static struct ieee80211_regdomain *regdom_intersect( rule1 = &rd1->reg_rules[x]; for (y = 0; y < rd2->n_reg_rules; y++) { rule2 = &rd2->reg_rules[y]; - /* This time around instead of using the stack lets + /* + * This time around instead of using the stack lets * write to the target rule directly saving ourselves - * a memcpy() */ + * a memcpy() + */ intersected_rule = &rd->reg_rules[rule_idx]; r = reg_rules_intersect(rule1, rule2, intersected_rule); - /* No need to memset here the intersected rule here as - * we're not using the stack anymore */ + /* + * No need to memset here the intersected rule here as + * we're not using the stack anymore + */ if (r) continue; rule_idx++; @@ -749,8 +789,10 @@ static struct ieee80211_regdomain *regdom_intersect( return rd; } -/* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may - * want to just have the channel structure use these */ +/* + * XXX: add support for the rest of enum nl80211_reg_rule_flags, we may + * want to just have the channel structure use these + */ static u32 map_regdom_flags(u32 rd_flags) { u32 channel_flags = 0; @@ -776,8 +818,10 @@ static int freq_reg_info_regd(struct wiphy *wiphy, regd = custom_regd ? custom_regd : cfg80211_regdomain; - /* Follow the driver's regulatory domain, if present, unless a country - * IE has been processed or a user wants to help complaince further */ + /* + * Follow the driver's regulatory domain, if present, unless a country + * IE has been processed or a user wants to help complaince further + */ if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE && last_request->initiator != REGDOM_SET_BY_USER && wiphy->regd) @@ -795,9 +839,11 @@ static int freq_reg_info_regd(struct wiphy *wiphy, fr = &rr->freq_range; pr = &rr->power_rule; - /* We only need to know if one frequency rule was + /* + * We only need to know if one frequency rule was * was in center_freq's band, that's enough, so lets - * not overwrite it once found */ + * not overwrite it once found + */ if (!band_rule_found) band_rule_found = freq_in_rule_band(fr, center_freq); @@ -850,7 +896,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, &max_bandwidth, ®_rule); if (r) { - /* This means no regulatory rule was found in the country IE + /* + * This means no regulatory rule was found in the country IE * with a frequency range on the center_freq's band, since * IEEE-802.11 allows for a country IE to have a subset of the * regulatory information provided in a country we ignore @@ -869,8 +916,10 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, chan->center_freq, wiphy_name(wiphy)); #endif } else { - /* In this case we know the country IE has at least one reg rule - * for the band so we respect its band definitions */ + /* + * In this case we know the country IE has at least one reg rule + * for the band so we respect its band definitions + */ #ifdef CONFIG_CFG80211_REG_DEBUG if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) printk(KERN_DEBUG "cfg80211: Disabling " @@ -889,9 +938,11 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, if (last_request->initiator == REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && request_wiphy->strict_regulatory) { - /* This gaurantees the driver's requested regulatory domain + /* + * This gaurantees the driver's requested regulatory domain * will always be used as a base for further regulatory - * settings */ + * settings + */ chan->flags = chan->orig_flags = map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = chan->orig_mag = @@ -932,8 +983,10 @@ static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) if (setby == REGDOM_SET_BY_CORE && wiphy->custom_regulatory) return true; - /* wiphy->regd will be set once the device has its own - * desired regulatory domain set */ + /* + * wiphy->regd will be set once the device has its own + * desired regulatory domain set + */ if (wiphy->strict_regulatory && !wiphy->regd && !is_world_regdom(last_request->alpha2)) return true; @@ -1043,8 +1096,10 @@ static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, return 0; } -/* Return value which can be used by ignore_request() to indicate - * it has been determined we should intersect two regulatory domains */ +/* + * Return value which can be used by ignore_request() to indicate + * it has been determined we should intersect two regulatory domains + */ #define REG_INTERSECT 1 /* This has the logic which determines when a new request @@ -1084,8 +1139,10 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return -EOPNOTSUPP; return -EALREADY; } - /* Two consecutive Country IE hints on the same wiphy. - * This should be picked up early by the driver/stack */ + /* + * Two consecutive Country IE hints on the same wiphy. + * This should be picked up early by the driver/stack + */ if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2, alpha2))) return 0; @@ -1104,13 +1161,17 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, case REGDOM_SET_BY_USER: if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) return REG_INTERSECT; - /* If the user knows better the user should set the regdom - * to their country before the IE is picked up */ + /* + * If the user knows better the user should set the regdom + * to their country before the IE is picked up + */ if (last_request->initiator == REGDOM_SET_BY_USER && last_request->intersect) return -EOPNOTSUPP; - /* Process user requests only after previous user/driver/core - * requests have been processed */ + /* + * Process user requests only after previous user/driver/core + * requests have been processed + */ if (last_request->initiator == REGDOM_SET_BY_CORE || last_request->initiator == REGDOM_SET_BY_DRIVER || last_request->initiator == REGDOM_SET_BY_USER) { @@ -1151,9 +1212,11 @@ int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, } intersect = true; } else if (r) { - /* If the regulatory domain being requested by the + /* + * If the regulatory domain being requested by the * driver has already been set just copy it to the - * wiphy */ + * wiphy + */ if (r == -EALREADY && set_by == REGDOM_SET_BY_DRIVER) { r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); if (r) @@ -1363,9 +1426,11 @@ static bool reg_same_country_ie_hint(struct wiphy *wiphy, if (likely(request_wiphy != wiphy)) return !country_ie_integrity_changes(country_ie_checksum); - /* We should not have let these through at this point, they + /* + * We should not have let these through at this point, they * should have been picked up earlier by the first alpha2 check - * on the device */ + * on the device + */ if (WARN_ON(!country_ie_integrity_changes(country_ie_checksum))) return true; return false; @@ -1395,9 +1460,11 @@ void regulatory_hint_11d(struct wiphy *wiphy, if (country_ie_len < IEEE80211_COUNTRY_IE_MIN_LEN) goto out; - /* Pending country IE processing, this can happen after we + /* + * Pending country IE processing, this can happen after we * call CRDA and wait for a response if a beacon was received before - * we were able to process the last regulatory_hint_11d() call */ + * we were able to process the last regulatory_hint_11d() call + */ if (country_ie_regdomain) goto out; @@ -1409,34 +1476,44 @@ void regulatory_hint_11d(struct wiphy *wiphy, else if (country_ie[2] == 'O') env = ENVIRON_OUTDOOR; - /* We will run this for *every* beacon processed for the BSSID, so + /* + * We will run this for *every* beacon processed for the BSSID, so * we optimize an early check to exit out early if we don't have to - * do anything */ + * do anything + */ if (likely(wiphy_idx_valid(last_request->wiphy_idx))) { struct cfg80211_registered_device *drv_last_ie; drv_last_ie = cfg80211_drv_by_wiphy_idx(last_request->wiphy_idx); - /* Lets keep this simple -- we trust the first AP - * after we intersect with CRDA */ + /* + * Lets keep this simple -- we trust the first AP + * after we intersect with CRDA + */ if (likely(&drv_last_ie->wiphy == wiphy)) { - /* Ignore IEs coming in on this wiphy with - * the same alpha2 and environment cap */ + /* + * Ignore IEs coming in on this wiphy with + * the same alpha2 and environment cap + */ if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, alpha2) && env == drv_last_ie->env)) { goto out; } - /* the wiphy moved on to another BSSID or the AP + /* + * the wiphy moved on to another BSSID or the AP * was reconfigured. XXX: We need to deal with the * case where the user suspends and goes to goes * to another country, and then gets IEs from an - * AP with different settings */ + * AP with different settings + */ goto out; } else { - /* Ignore IEs coming in on two separate wiphys with - * the same alpha2 and environment cap */ + /* + * Ignore IEs coming in on two separate wiphys with + * the same alpha2 and environment cap + */ if (likely(alpha2_equal(drv_last_ie->country_ie_alpha2, alpha2) && env == drv_last_ie->env)) { @@ -1467,8 +1544,10 @@ void regulatory_hint_11d(struct wiphy *wiphy, if (!request) goto free_rd_out; - /* We keep this around for when CRDA comes back with a response so - * we can intersect with that */ + /* + * We keep this around for when CRDA comes back with a response so + * we can intersect with that + */ country_ie_regdomain = rd; request->wiphy_idx = get_wiphy_idx(wiphy); @@ -1506,8 +1585,10 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd) freq_range = ®_rule->freq_range; power_rule = ®_rule->power_rule; - /* There may not be documentation for max antenna gain - * in certain regions */ + /* + * There may not be documentation for max antenna gain + * in certain regions + */ if (power_rule->max_antenna_gain) printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " "(%d mBi, %d mBm)\n", @@ -1618,21 +1699,27 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (!last_request) return -EINVAL; - /* Lets only bother proceeding on the same alpha2 if the current + /* + * Lets only bother proceeding on the same alpha2 if the current * rd is non static (it means CRDA was present and was used last) - * and the pending request came in from a country IE */ + * and the pending request came in from a country IE + */ if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { - /* If someone else asked us to change the rd lets only bother - * checking if the alpha2 changes if CRDA was already called */ + /* + * If someone else asked us to change the rd lets only bother + * checking if the alpha2 changes if CRDA was already called + */ if (!is_old_static_regdom(cfg80211_regdomain) && !regdom_changed(rd->alpha2)) return -EINVAL; } - /* Now lets set the regulatory domain, update all driver channels + /* + * Now lets set the regulatory domain, update all driver channels * and finally inform them of what we have done, in case they want * to review or adjust their own settings based on their own - * internal EEPROM data */ + * internal EEPROM data + */ if (WARN_ON(!reg_is_valid_request(rd->alpha2))) return -EINVAL; @@ -1655,8 +1742,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) return 0; } - /* For a driver hint, lets copy the regulatory domain the - * driver wanted to the wiphy to deal with conflicts */ + /* + * For a driver hint, lets copy the regulatory domain the + * driver wanted to the wiphy to deal with conflicts + */ BUG_ON(request_wiphy->regd); @@ -1677,9 +1766,11 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (!intersected_rd) return -EINVAL; - /* We can trash what CRDA provided now. + /* + * We can trash what CRDA provided now. * However if a driver requested this specific regulatory - * domain we keep it for its private use */ + * domain we keep it for its private use + */ if (last_request->initiator == REGDOM_SET_BY_DRIVER) request_wiphy->regd = rd; else @@ -1701,8 +1792,10 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) BUG_ON(!country_ie_regdomain); if (rd != country_ie_regdomain) { - /* Intersect what CRDA returned and our what we - * had built from the Country IE received */ + /* + * Intersect what CRDA returned and our what we + * had built from the Country IE received + */ intersected_rd = regdom_intersect(rd, country_ie_regdomain); @@ -1712,9 +1805,11 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) kfree(country_ie_regdomain); country_ie_regdomain = NULL; } else { - /* This would happen when CRDA was not present and + /* + * This would happen when CRDA was not present and * OLD_REGULATORY was enabled. We intersect our Country - * IE rd and what was set on cfg80211 originally */ + * IE rd and what was set on cfg80211 originally + */ intersected_rd = regdom_intersect(rd, cfg80211_regdomain); } @@ -1739,9 +1834,11 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) } -/* Use this call to set the current regulatory domain. Conflicts with +/* + * Use this call to set the current regulatory domain. Conflicts with * multiple drivers can be ironed out later. Caller must've already - * kmalloc'd the rd structure. Caller must hold cfg80211_mutex */ + * kmalloc'd the rd structure. Caller must hold cfg80211_mutex + */ int set_regdom(const struct ieee80211_regdomain *rd) { int r; @@ -1800,10 +1897,12 @@ int regulatory_init(void) printk(KERN_INFO "cfg80211: Using static regulatory domain info\n"); print_regdomain_info(cfg80211_regdomain); - /* The old code still requests for a new regdomain and if + /* + * The old code still requests for a new regdomain and if * you have CRDA you get it updated, otherwise you get * stuck with the static values. We ignore "EU" code as - * that is not a valid ISO / IEC 3166 alpha2 */ + * that is not a valid ISO / IEC 3166 alpha2 + */ if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') err = regulatory_hint_core(ieee80211_regdom); #else -- cgit v1.2.3 From fff32c04f6074de0719cc46d8f488aaf746f151a Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:32 -0500 Subject: cfg80211: allow drivers that agree on regulatory to agree This allows drivers that agree on regulatory to share their regulatory domain. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 7ecb9033ad42..5456534bdf8c 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1157,6 +1157,16 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return 0; return -EALREADY; } + + /* + * This would happen if you unplug and plug your card + * back in or if you add a new device for which the previously + * loaded card also agrees on the regulatory domain. + */ + if (last_request->initiator == REGDOM_SET_BY_DRIVER && + alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return -EALREADY; + return REG_INTERSECT; case REGDOM_SET_BY_USER: if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) -- cgit v1.2.3 From 69b1572bd82046cc8f730e05c797062ca8c2b535 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:04:33 -0500 Subject: cfg80211: rename regdom_changed to regdom_changes() and use it Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5456534bdf8c..3bd2ea2aeccb 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -291,7 +291,7 @@ static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) return false; } -static bool regdom_changed(const char *alpha2) +static bool regdom_changes(const char *alpha2) { assert_cfg80211_lock(); @@ -1134,8 +1134,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, * intersect them, but that seems unlikely * to be correct. Reject second one for now. */ - if (!alpha2_equal(alpha2, - cfg80211_regdomain->alpha2)) + if (regdom_changes(alpha2)) return -EOPNOTSUPP; return -EALREADY; } @@ -1143,8 +1142,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, * Two consecutive Country IE hints on the same wiphy. * This should be picked up early by the driver/stack */ - if (WARN_ON(!alpha2_equal(cfg80211_regdomain->alpha2, - alpha2))) + if (WARN_ON(regdom_changes(alpha2))) return 0; return -EALREADY; } @@ -1153,7 +1151,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, if (last_request->initiator == REGDOM_SET_BY_CORE) { if (is_old_static_regdom(cfg80211_regdomain)) return 0; - if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + if (regdom_changes(alpha2)) return 0; return -EALREADY; } @@ -1164,7 +1162,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, * loaded card also agrees on the regulatory domain. */ if (last_request->initiator == REGDOM_SET_BY_DRIVER && - alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + !regdom_changes(alpha2)) return -EALREADY; return REG_INTERSECT; @@ -1185,13 +1183,12 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, if (last_request->initiator == REGDOM_SET_BY_CORE || last_request->initiator == REGDOM_SET_BY_DRIVER || last_request->initiator == REGDOM_SET_BY_USER) { - if (!alpha2_equal(last_request->alpha2, - cfg80211_regdomain->alpha2)) + if (regdom_changes(last_request->alpha2)) return -EAGAIN; } if (!is_old_static_regdom(cfg80211_regdomain) && - alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + !regdom_changes(alpha2)) return -EALREADY; return 0; @@ -1720,7 +1717,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * checking if the alpha2 changes if CRDA was already called */ if (!is_old_static_regdom(cfg80211_regdomain) && - !regdom_changed(rd->alpha2)) + !regdom_changes(rd->alpha2)) return -EINVAL; } -- cgit v1.2.3 From 68798a62634e58e01d6f1de509b253dcb40625bd Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:20:37 -0500 Subject: cfg80211: enable active-scan / beaconing on Ch 1-11 for world regdom This enables active scan and beaconing on Channels 1 through 11 on the static world regulatory domain. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 3bd2ea2aeccb..85c2c31721ec 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -76,9 +76,8 @@ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 1, .alpha2 = "00", .reg_rules = { - REG_RULE(2412-10, 2462+10, 40, 6, 20, - NL80211_RRF_PASSIVE_SCAN | - NL80211_RRF_NO_IBSS), + /* IEEE 802.11b/g, channels 1..11 */ + REG_RULE(2412-10, 2462+10, 40, 6, 20, 0), } }; -- cgit v1.2.3 From 3fc71f775af677f640f0f0780b16f1b0958f6d9d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:20:38 -0500 Subject: cfg80211: enable 5 GHz world roaming channels The current static world regulatory domain is too restrictive, we can use some 5 GHz channels world wide so long as they do not touch frequencies which require DFS. The compromise is we must also enforce passive scanning and disallow usage of a mode of operation that beacons: (AP | IBSS | Mesh) Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 85c2c31721ec..da2a8aca4280 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -73,11 +73,22 @@ static spinlock_t reg_requests_lock; /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { - .n_reg_rules = 1, + .n_reg_rules = 3, .alpha2 = "00", .reg_rules = { /* IEEE 802.11b/g, channels 1..11 */ REG_RULE(2412-10, 2462+10, 40, 6, 20, 0), + /* IEEE 802.11a, channel 36..48 */ + REG_RULE(5180-10, 5240+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS), + + /* NB: 5260 MHz - 5700 MHz requies DFS */ + + /* IEEE 802.11a, channel 149..165 */ + REG_RULE(5745-10, 5825+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS), } }; -- cgit v1.2.3 From e38f8a7a8bebbab9d97f204e2cf05ef58b048a1d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:20:39 -0500 Subject: cfg80211: Add AP beacon regulatory hints When devices are world roaming they cannot beacon or do active scan on 5 GHz or on channels 12, 13 and 14 on the 2 GHz band. Although we have a good regulatory API some cards may _always_ world roam, this is also true when a system does not have CRDA present. Devices doing world roaming can still passive scan, if they find a beacon from an AP on one of the world roaming frequencies we make the assumption we can do the same and we also remove the passive scan requirement. This adds support for providing beacon regulatory hints based on scans. This works for devices that do either hardware or software scanning. If a channel has not yet been marked as having had a beacon present on it we queue the beacon hint processing into the workqueue. All wireless devices will benefit from beacon regulatory hints from any wireless device on a system including new devices connected to the system at a later time. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/core.c | 5 +- net/wireless/reg.c | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++- net/wireless/reg.h | 21 +++++ net/wireless/scan.c | 3 + 4 files changed, 250 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index b1a354b7fc06..dd7f222919fe 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -32,8 +32,9 @@ MODULE_DESCRIPTION("wireless configuration support"); LIST_HEAD(cfg80211_drv_list); /* - * This is used to protect the cfg80211_drv_list, cfg80211_regdomain, and - * the last reguluatory request receipt in regd.c + * This is used to protect the cfg80211_drv_list, cfg80211_regdomain, + * country_ie_regdomain, the reg_beacon_list and the the last regulatory + * request receipt (last_request). */ DEFINE_MUTEX(cfg80211_mutex); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index da2a8aca4280..e5e432d6af34 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -68,9 +68,22 @@ const struct ieee80211_regdomain *cfg80211_regdomain; */ static const struct ieee80211_regdomain *country_ie_regdomain; +/* Used to queue up regulatory hints */ static LIST_HEAD(reg_requests_list); static spinlock_t reg_requests_lock; +/* Used to queue up beacon hints for review */ +static LIST_HEAD(reg_pending_beacons); +static spinlock_t reg_pending_beacons_lock; + +/* Used to keep track of processed beacon hints */ +static LIST_HEAD(reg_beacon_list); + +struct reg_beacon { + struct list_head list; + struct ieee80211_channel chan; +}; + /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { .n_reg_rules = 3, @@ -1011,16 +1024,120 @@ static void update_all_wiphy_regulatory(enum reg_set_by setby) wiphy_update_regulatory(&drv->wiphy, setby); } +static void handle_reg_beacon(struct wiphy *wiphy, + unsigned int chan_idx, + struct reg_beacon *reg_beacon) +{ +#ifdef CONFIG_CFG80211_REG_DEBUG +#define REG_DEBUG_BEACON_FLAG(desc) \ + printk(KERN_DEBUG "cfg80211: Enabling " desc " on " \ + "frequency: %d MHz (Ch %d) on %s\n", \ + reg_beacon->chan.center_freq, \ + ieee80211_frequency_to_channel(reg_beacon->chan.center_freq), \ + wiphy_name(wiphy)); +#else +#define REG_DEBUG_BEACON_FLAG(desc) do {} while (0) +#endif + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + + assert_cfg80211_lock(); + + sband = wiphy->bands[reg_beacon->chan.band]; + chan = &sband->channels[chan_idx]; + + if (likely(chan->center_freq != reg_beacon->chan.center_freq)) + return; + + if (chan->flags & IEEE80211_CHAN_PASSIVE_SCAN) { + chan->flags &= ~IEEE80211_CHAN_PASSIVE_SCAN; + REG_DEBUG_BEACON_FLAG("active scanning"); + } + + if (chan->flags & IEEE80211_CHAN_NO_IBSS) { + chan->flags &= ~IEEE80211_CHAN_NO_IBSS; + REG_DEBUG_BEACON_FLAG("beaconing"); + } + + chan->beacon_found = true; +#undef REG_DEBUG_BEACON_FLAG +} + +/* + * Called when a scan on a wiphy finds a beacon on + * new channel + */ +static void wiphy_update_new_beacon(struct wiphy *wiphy, + struct reg_beacon *reg_beacon) +{ + unsigned int i; + struct ieee80211_supported_band *sband; + + assert_cfg80211_lock(); + + if (!wiphy->bands[reg_beacon->chan.band]) + return; + + sband = wiphy->bands[reg_beacon->chan.band]; + + for (i = 0; i < sband->n_channels; i++) + handle_reg_beacon(wiphy, i, reg_beacon); +} + +/* + * Called upon reg changes or a new wiphy is added + */ +static void wiphy_update_beacon_reg(struct wiphy *wiphy) +{ + unsigned int i; + struct ieee80211_supported_band *sband; + struct reg_beacon *reg_beacon; + + assert_cfg80211_lock(); + + if (list_empty(®_beacon_list)) + return; + + list_for_each_entry(reg_beacon, ®_beacon_list, list) { + if (!wiphy->bands[reg_beacon->chan.band]) + continue; + sband = wiphy->bands[reg_beacon->chan.band]; + for (i = 0; i < sband->n_channels; i++) + handle_reg_beacon(wiphy, i, reg_beacon); + } +} + +static bool reg_is_world_roaming(struct wiphy *wiphy) +{ + if (is_world_regdom(cfg80211_regdomain->alpha2) || + (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) + return true; + if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE && + wiphy->custom_regulatory) + return true; + return false; +} + +/* Reap the advantages of previously found beacons */ +static void reg_process_beacons(struct wiphy *wiphy) +{ + if (!reg_is_world_roaming(wiphy)) + return; + wiphy_update_beacon_reg(wiphy); +} + void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) { enum ieee80211_band band; if (ignore_reg_update(wiphy, setby)) - return; + goto out; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) handle_band(wiphy, band); } +out: + reg_process_beacons(wiphy); if (wiphy->reg_notifier) wiphy->reg_notifier(wiphy, last_request); } @@ -1314,6 +1431,7 @@ out: return r; } +/* Processes regulatory hints, this is all the REGDOM_SET_BY_* */ static void reg_process_pending_hints(void) { struct regulatory_request *reg_request; @@ -1344,9 +1462,44 @@ static void reg_process_pending_hints(void) spin_unlock(®_requests_lock); } +/* Processes beacon hints -- this has nothing to do with country IEs */ +static void reg_process_pending_beacon_hints(void) +{ + struct cfg80211_registered_device *drv; + struct reg_beacon *pending_beacon, *tmp; + + mutex_lock(&cfg80211_mutex); + + /* This goes through the _pending_ beacon list */ + spin_lock_bh(®_pending_beacons_lock); + + if (list_empty(®_pending_beacons)) { + spin_unlock_bh(®_pending_beacons_lock); + goto out; + } + + list_for_each_entry_safe(pending_beacon, tmp, + ®_pending_beacons, list) { + + list_del_init(&pending_beacon->list); + + /* Applies the beacon hint to current wiphys */ + list_for_each_entry(drv, &cfg80211_drv_list, list) + wiphy_update_new_beacon(&drv->wiphy, pending_beacon); + + /* Remembers the beacon hint for new wiphys or reg changes */ + list_add_tail(&pending_beacon->list, ®_beacon_list); + } + + spin_unlock_bh(®_pending_beacons_lock); +out: + mutex_unlock(&cfg80211_mutex); +} + static void reg_todo(struct work_struct *work) { reg_process_pending_hints(); + reg_process_pending_beacon_hints(); } static DECLARE_WORK(reg_work, reg_todo); @@ -1587,6 +1740,55 @@ out: } EXPORT_SYMBOL(regulatory_hint_11d); +static bool freq_is_chan_12_13_14(u16 freq) +{ + if (freq == ieee80211_channel_to_frequency(12) || + freq == ieee80211_channel_to_frequency(13) || + freq == ieee80211_channel_to_frequency(14)) + return true; + return false; +} + +int regulatory_hint_found_beacon(struct wiphy *wiphy, + struct ieee80211_channel *beacon_chan, + gfp_t gfp) +{ + struct reg_beacon *reg_beacon; + + if (likely((beacon_chan->beacon_found || + (beacon_chan->flags & IEEE80211_CHAN_RADAR) || + (beacon_chan->band == IEEE80211_BAND_2GHZ && + !freq_is_chan_12_13_14(beacon_chan->center_freq))))) + return 0; + + reg_beacon = kzalloc(sizeof(struct reg_beacon), gfp); + if (!reg_beacon) + return -ENOMEM; + +#ifdef CONFIG_CFG80211_REG_DEBUG + printk(KERN_DEBUG "cfg80211: Found new beacon on " + "frequency: %d MHz (Ch %d) on %s\n", + beacon_chan->center_freq, + ieee80211_frequency_to_channel(beacon_chan->center_freq), + wiphy_name(wiphy)); +#endif + memcpy(®_beacon->chan, beacon_chan, + sizeof(struct ieee80211_channel)); + + + /* + * Since we can be called from BH or and non-BH context + * we must use spin_lock_bh() + */ + spin_lock_bh(®_pending_beacons_lock); + list_add_tail(®_beacon->list, ®_pending_beacons); + spin_unlock_bh(®_pending_beacons_lock); + + schedule_work(®_work); + + return 0; +} + static void print_rd_rules(const struct ieee80211_regdomain *rd) { unsigned int i; @@ -1908,6 +2110,7 @@ int regulatory_init(void) return PTR_ERR(reg_pdev); spin_lock_init(®_requests_lock); + spin_lock_init(®_pending_beacons_lock); #ifdef CONFIG_WIRELESS_OLD_REGULATORY cfg80211_regdomain = static_regdom(ieee80211_regdom); @@ -1951,6 +2154,7 @@ int regulatory_init(void) void regulatory_exit(void) { struct regulatory_request *reg_request, *tmp; + struct reg_beacon *reg_beacon, *btmp; cancel_work_sync(®_work); @@ -1965,6 +2169,24 @@ void regulatory_exit(void) platform_device_unregister(reg_pdev); + spin_lock_bh(®_pending_beacons_lock); + if (!list_empty(®_pending_beacons)) { + list_for_each_entry_safe(reg_beacon, btmp, + ®_pending_beacons, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + } + spin_unlock_bh(®_pending_beacons_lock); + + if (!list_empty(®_beacon_list)) { + list_for_each_entry_safe(reg_beacon, btmp, + ®_beacon_list, list) { + list_del(®_beacon->list); + kfree(reg_beacon); + } + } + spin_lock(®_requests_lock); if (!list_empty(®_requests_list)) { list_for_each_entry_safe(reg_request, tmp, diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 4730def5a69d..65bfd0558ce1 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -38,4 +38,25 @@ extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, const char *alpha2, u32 country_ie_checksum, enum environment_cap country_ie_env); +/** + * regulatory_hint_found_beacon - hints a beacon was found on a channel + * @wiphy: the wireless device where the beacon was found on + * @beacon_chan: the channel on which the beacon was found on + * @gfp: context flags + * + * This informs the wireless core that a beacon from an AP was found on + * the channel provided. This allows the wireless core to make educated + * guesses on regulatory to help with world roaming. This is only used for + * world roaming -- when we do not know our current location. This is + * only useful on channels 12, 13 and 14 on the 2 GHz band as channels + * 1-11 are already enabled by the world regulatory domain; and on + * non-radar 5 GHz channels. + * + * Drivers do not need to call this, cfg80211 will do it for after a scan + * on a newly found BSS. + */ +int regulatory_hint_found_beacon(struct wiphy *wiphy, + struct ieee80211_channel *beacon_chan, + gfp_t gfp); + #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 60600657b657..280dbcd02c15 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -430,6 +430,9 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy, if (!res) return NULL; + if (res->pub.capability & WLAN_CAPABILITY_ESS) + regulatory_hint_found_beacon(wiphy, channel, gfp); + /* cfg80211_bss_update gives us a referenced result */ return &res->pub; } -- cgit v1.2.3 From d1c96a9a29a5f34fa50133889b6110dca6cc3d43 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:24:13 -0500 Subject: cfg80211: make __regulatory_hint() static Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 23 +++++++++++++++++++++-- net/wireless/reg.h | 23 ----------------------- 2 files changed, 21 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index e5e432d6af34..0253d01cde97 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1324,8 +1324,27 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, return -EINVAL; } -/* Caller must hold &cfg80211_mutex */ -int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, +/** + * __regulatory_hint - hint to the wireless core a regulatory domain + * @wiphy: if the hint comes from country information from an AP, this + * is required to be set to the wiphy that received the information + * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain + * should be in. + * @country_ie_checksum: checksum of processed country IE, set this to 0 + * if the hint did not come from a country IE + * @country_ie_env: the environment the IE told us we are in, %ENVIRON_* + * + * The Wireless subsystem can use this function to hint to the wireless core + * what it believes should be the current regulatory domain by giving it an + * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be + * in. + * + * Returns zero if all went fine, %-EALREADY if a regulatory domain had + * already been set or other standard error codes. + * + * Caller must hold &cfg80211_mutex + */ +static int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, const char *alpha2, u32 country_ie_checksum, enum environment_cap env) diff --git a/net/wireless/reg.h b/net/wireless/reg.h index 65bfd0558ce1..e37829a49dc4 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -15,29 +15,6 @@ void regulatory_exit(void); int set_regdom(const struct ieee80211_regdomain *rd); -/** - * __regulatory_hint - hint to the wireless core a regulatory domain - * @wiphy: if the hint comes from country information from an AP, this - * is required to be set to the wiphy that received the information - * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain - * should be in. - * @country_ie_checksum: checksum of processed country IE, set this to 0 - * if the hint did not come from a country IE - * @country_ie_env: the environment the IE told us we are in, %ENVIRON_* - * - * The Wireless subsystem can use this function to hint to the wireless core - * what it believes should be the current regulatory domain by giving it an - * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be - * in. - * - * Returns zero if all went fine, %-EALREADY if a regulatory domain had - * already been set or other standard error codes. - * - */ -extern int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2, u32 country_ie_checksum, - enum environment_cap country_ie_env); - /** * regulatory_hint_found_beacon - hints a beacon was found on a channel * @wiphy: the wireless device where the beacon was found on -- cgit v1.2.3 From 28da32d7cafdd181d6a59e8c0b74e9651a8f8be3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:24:14 -0500 Subject: cfg80211: pass the regulatory_request struct in __regulatory_hint() We were passing value by value, lets just pass the struct. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 45 ++++++++++++++++++--------------------------- 1 file changed, 18 insertions(+), 27 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 0253d01cde97..6e1733733e18 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1328,26 +1328,18 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, * __regulatory_hint - hint to the wireless core a regulatory domain * @wiphy: if the hint comes from country information from an AP, this * is required to be set to the wiphy that received the information - * @alpha2: the ISO/IEC 3166 alpha2 being claimed the regulatory domain - * should be in. - * @country_ie_checksum: checksum of processed country IE, set this to 0 - * if the hint did not come from a country IE - * @country_ie_env: the environment the IE told us we are in, %ENVIRON_* + * @pending_request: the regulatory request currently being processed * * The Wireless subsystem can use this function to hint to the wireless core - * what it believes should be the current regulatory domain by giving it an - * ISO/IEC 3166 alpha2 country code it knows its regulatory domain should be - * in. + * what it believes should be the current regulatory domain. * * Returns zero if all went fine, %-EALREADY if a regulatory domain had * already been set or other standard error codes. * * Caller must hold &cfg80211_mutex */ -static int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2, - u32 country_ie_checksum, - enum environment_cap env) +static int __regulatory_hint(struct wiphy *wiphy, + struct regulatory_request *pending_request) { struct regulatory_request *request; bool intersect = false; @@ -1355,10 +1347,12 @@ static int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, assert_cfg80211_lock(); - r = ignore_request(wiphy, set_by, alpha2); + r = ignore_request(wiphy, + pending_request->initiator, + pending_request->alpha2); if (r == REG_INTERSECT) { - if (set_by == REGDOM_SET_BY_DRIVER) { + if (pending_request->initiator == REGDOM_SET_BY_DRIVER) { r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); if (r) return r; @@ -1370,7 +1364,8 @@ static int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, * driver has already been set just copy it to the * wiphy */ - if (r == -EALREADY && set_by == REGDOM_SET_BY_DRIVER) { + if (r == -EALREADY && + pending_request->initiator == REGDOM_SET_BY_DRIVER) { r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); if (r) return r; @@ -1386,13 +1381,13 @@ new_request: if (!request) return -ENOMEM; - request->alpha2[0] = alpha2[0]; - request->alpha2[1] = alpha2[1]; - request->initiator = set_by; - request->wiphy_idx = get_wiphy_idx(wiphy); + request->alpha2[0] = pending_request->alpha2[0]; + request->alpha2[1] = pending_request->alpha2[1]; + request->initiator = pending_request->initiator; + request->wiphy_idx = pending_request->wiphy_idx; request->intersect = intersect; - request->country_ie_checksum = country_ie_checksum; - request->country_ie_env = env; + request->country_ie_checksum = pending_request->country_ie_checksum; + request->country_ie_env = pending_request->country_ie_env; kfree(last_request); last_request = request; @@ -1411,7 +1406,7 @@ new_request: * * to intersect with the static rd */ - return call_crda(alpha2); + return call_crda(request->alpha2); } /* This currently only processes user and driver regulatory hints */ @@ -1433,11 +1428,7 @@ static int reg_process_hint(struct regulatory_request *reg_request) goto out; } - r = __regulatory_hint(wiphy, - reg_request->initiator, - reg_request->alpha2, - reg_request->country_ie_checksum, - reg_request->country_ie_env); + r = __regulatory_hint(wiphy, reg_request); /* This is required so that the orig_* parameters are saved */ if (r == -EALREADY && wiphy && wiphy->strict_regulatory) wiphy_update_regulatory(wiphy, reg_request->initiator); -- cgit v1.2.3 From d951c1ddeba3c84c464069c808efc494aa705304 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:24:15 -0500 Subject: cfg80211: do not kzalloc() again for a new request on __regulatory_hint Since we already have a regulatory request from the workqueue use that and avoid a new kzalloc() Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 54 ++++++++++++++++-------------------------------------- 1 file changed, 16 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6e1733733e18..6152a7ac9b90 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1341,7 +1341,6 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, static int __regulatory_hint(struct wiphy *wiphy, struct regulatory_request *pending_request) { - struct regulatory_request *request; bool intersect = false; int r = 0; @@ -1354,8 +1353,10 @@ static int __regulatory_hint(struct wiphy *wiphy, if (r == REG_INTERSECT) { if (pending_request->initiator == REGDOM_SET_BY_DRIVER) { r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); - if (r) + if (r) { + kfree(pending_request); return r; + } } intersect = true; } else if (r) { @@ -1367,30 +1368,24 @@ static int __regulatory_hint(struct wiphy *wiphy, if (r == -EALREADY && pending_request->initiator == REGDOM_SET_BY_DRIVER) { r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); - if (r) + if (r) { + kfree(pending_request); return r; + } r = -EALREADY; goto new_request; } + kfree(pending_request); return r; } new_request: - request = kzalloc(sizeof(struct regulatory_request), - GFP_KERNEL); - if (!request) - return -ENOMEM; + kfree(last_request); - request->alpha2[0] = pending_request->alpha2[0]; - request->alpha2[1] = pending_request->alpha2[1]; - request->initiator = pending_request->initiator; - request->wiphy_idx = pending_request->wiphy_idx; - request->intersect = intersect; - request->country_ie_checksum = pending_request->country_ie_checksum; - request->country_ie_env = pending_request->country_ie_env; + last_request = pending_request; + last_request->intersect = intersect; - kfree(last_request); - last_request = request; + pending_request = NULL; /* When r == REG_INTERSECT we do need to call CRDA */ if (r < 0) @@ -1406,11 +1401,11 @@ new_request: * * to intersect with the static rd */ - return call_crda(request->alpha2); + return call_crda(last_request->alpha2); } /* This currently only processes user and driver regulatory hints */ -static int reg_process_hint(struct regulatory_request *reg_request) +static void reg_process_hint(struct regulatory_request *reg_request) { int r = 0; struct wiphy *wiphy = NULL; @@ -1424,7 +1419,7 @@ static int reg_process_hint(struct regulatory_request *reg_request) if (reg_request->initiator == REGDOM_SET_BY_DRIVER && !wiphy) { - r = -ENODEV; + kfree(reg_request); goto out; } @@ -1434,18 +1429,12 @@ static int reg_process_hint(struct regulatory_request *reg_request) wiphy_update_regulatory(wiphy, reg_request->initiator); out: mutex_unlock(&cfg80211_mutex); - - if (r == -EALREADY) - r = 0; - - return r; } /* Processes regulatory hints, this is all the REGDOM_SET_BY_* */ static void reg_process_pending_hints(void) { struct regulatory_request *reg_request; - int r; spin_lock(®_requests_lock); while (!list_empty(®_requests_list)) { @@ -1453,20 +1442,9 @@ static void reg_process_pending_hints(void) struct regulatory_request, list); list_del_init(®_request->list); - spin_unlock(®_requests_lock); - r = reg_process_hint(reg_request); -#ifdef CONFIG_CFG80211_REG_DEBUG - if (r && (reg_request->initiator == REGDOM_SET_BY_DRIVER || - reg_request->initiator == REGDOM_SET_BY_COUNTRY_IE)) - printk(KERN_ERR "cfg80211: wiphy_idx %d sent a " - "regulatory hint for %c%c but now has " - "gone fishing, ignoring request\n", - reg_request->wiphy_idx, - reg_request->alpha2[0], - reg_request->alpha2[1]); -#endif - kfree(reg_request); + spin_unlock(®_requests_lock); + reg_process_hint(reg_request); spin_lock(®_requests_lock); } spin_unlock(®_requests_lock); -- cgit v1.2.3 From 2f92cd2e5f1751f7da5fa9b58e0ab22da6577cfd Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Sat, 21 Feb 2009 00:24:16 -0500 Subject: cfg80211: pass the regulatory_request to ignore_request Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 6152a7ac9b90..ce66bfdf57ec 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1231,8 +1231,8 @@ static int reg_copy_regd(const struct ieee80211_regdomain **dst_regd, /* This has the logic which determines when a new request * should be ignored. */ -static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, - const char *alpha2) +static int ignore_request(struct wiphy *wiphy, + struct regulatory_request *pending_request) { struct wiphy *last_wiphy = NULL; @@ -1242,7 +1242,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, if (!last_request) return 0; - switch (set_by) { + switch (pending_request->initiator) { case REGDOM_SET_BY_INIT: return -EINVAL; case REGDOM_SET_BY_CORE: @@ -1251,7 +1251,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); - if (unlikely(!is_an_alpha2(alpha2))) + if (unlikely(!is_an_alpha2(pending_request->alpha2))) return -EINVAL; if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { if (last_wiphy != wiphy) { @@ -1261,7 +1261,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, * intersect them, but that seems unlikely * to be correct. Reject second one for now. */ - if (regdom_changes(alpha2)) + if (regdom_changes(pending_request->alpha2)) return -EOPNOTSUPP; return -EALREADY; } @@ -1269,7 +1269,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, * Two consecutive Country IE hints on the same wiphy. * This should be picked up early by the driver/stack */ - if (WARN_ON(regdom_changes(alpha2))) + if (WARN_ON(regdom_changes(pending_request->alpha2))) return 0; return -EALREADY; } @@ -1278,7 +1278,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, if (last_request->initiator == REGDOM_SET_BY_CORE) { if (is_old_static_regdom(cfg80211_regdomain)) return 0; - if (regdom_changes(alpha2)) + if (regdom_changes(pending_request->alpha2)) return 0; return -EALREADY; } @@ -1289,7 +1289,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, * loaded card also agrees on the regulatory domain. */ if (last_request->initiator == REGDOM_SET_BY_DRIVER && - !regdom_changes(alpha2)) + !regdom_changes(pending_request->alpha2)) return -EALREADY; return REG_INTERSECT; @@ -1315,7 +1315,7 @@ static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, } if (!is_old_static_regdom(cfg80211_regdomain) && - !regdom_changes(alpha2)) + !regdom_changes(pending_request->alpha2)) return -EALREADY; return 0; @@ -1346,9 +1346,7 @@ static int __regulatory_hint(struct wiphy *wiphy, assert_cfg80211_lock(); - r = ignore_request(wiphy, - pending_request->initiator, - pending_request->alpha2); + r = ignore_request(wiphy, pending_request); if (r == REG_INTERSECT) { if (pending_request->initiator == REGDOM_SET_BY_DRIVER) { -- cgit v1.2.3 From 34e8f08231388f9e16c6f1e2461f53afaf7f1e5e Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Sun, 22 Feb 2009 00:07:28 +0100 Subject: mac80211: Don't merge with the same BSSID It was not a good idea to do a TSF reset on strange IBSS merges to the same BSSID. For example it will break the TSF sync of ath9k completely and it is unnecessary as all hardware I have tested do a TSF sync to a higher value automatically and IBSS merges are only done to higher TSF values. It only need a TSF reset to accept a lower value, when the IBSS network is changed manually. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index aa8937c56285..7a944ca1c840 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -291,6 +291,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, sdata->u.ibss.ssid_len)) goto put_bss; + /* same BSSID */ + if (memcmp(bss->cbss.bssid, sdata->u.ibss.bssid, ETH_ALEN) == 0) + goto put_bss; + if (rx_status->flag & RX_FLAG_TSFT) { /* * For correct IBSS merging we need mactime; since mactime is -- cgit v1.2.3 From 4a332a385a86e31bfe181d969a8cb5579798fe03 Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Sun, 22 Feb 2009 18:19:33 +0100 Subject: mac80211: Give it some time to do the TSF sync Give slow hardware some time to do the TSF sync, to not run into an IBSS merging endless loop in some rarely situations. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 7a944ca1c840..a96ce9dfc6b5 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -29,6 +29,7 @@ #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) +#define IEEE80211_IBSS_MERGE_DELAY 0x400000 #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) #define IEEE80211_IBSS_MAX_STA_ENTRIES 128 @@ -336,6 +337,10 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, jiffies); #endif + /* give slow hardware some time to do the TSF sync */ + if (rx_timestamp < IEEE80211_IBSS_MERGE_DELAY) + goto put_bss; + if (beacon_timestamp > rx_timestamp) { #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: beacon TSF higher than " -- cgit v1.2.3 From 0bfbce18b9c11201ebf1cfbc0deeab7bdbfe32a5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Tue, 24 Feb 2009 16:49:58 +0200 Subject: nl80211: Avoid AP mode BUG_ON hang with invalid lock assert "cfg80211: add assert_cfg80211_lock() to ensure proper protection" added assert_cfg80211_lock() calls into various places. At least one of them, nl80211_send_wiphy(), should not have been there. That triggers the BUG_ON in assert_cfg80211_lock() and pretty much kills the kernel whenever someone runs hostapd.. Remove that call and make assert_cfg80211_lock() use WARN_ON instead of BUG_ON to be a bit more friendly to users. Signed-off-by: Jouni Malinen Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.h | 2 +- net/wireless/nl80211.c | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index cd8e6e3ef116..f6c53f5807f4 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -76,7 +76,7 @@ extern struct list_head cfg80211_drv_list; static inline void assert_cfg80211_lock(void) { - BUG_ON(!mutex_is_locked(&cfg80211_mutex)); + WARN_ON(!mutex_is_locked(&cfg80211_mutex)); } /* diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 97f69bed3fe2..531bb67cf502 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -137,8 +137,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, int i; u16 ifmodes = dev->wiphy.interface_modes; - assert_cfg80211_lock(); - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -1; -- cgit v1.2.3 From c3b3240450ab8a1f3e52c5a69d53113deb6f91c5 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 2 Mar 2009 01:49:28 -0800 Subject: rds: Fix build on powerpc. As reported by Stephen Rothwell. > Today's linux-next build (powerpc allyesconfig) failed like this: > > net/rds/cong.c: In function 'rds_cong_set_bit': > net/rds/cong.c:284: error: implicit declaration of function 'generic___set_le_bit' > net/rds/cong.c: In function 'rds_cong_clear_bit': > net/rds/cong.c:298: error: implicit declaration of function 'generic___clear_le_bit' > net/rds/cong.c: In function 'rds_cong_test_bit': > net/rds/cong.c:309: error: implicit declaration of function 'generic_test_le_bit' Signed-off-by: David S. Miller --- net/rds/cong.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/rds/cong.c b/net/rds/cong.c index 90e6b31d8e8a..710e4599d76c 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -33,6 +33,8 @@ #include #include +#include + #include "rds.h" /* -- cgit v1.2.3 From ac11ba753f3aa839292c1a3b6c971c637ad2e839 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:27 +0000 Subject: tcp: don't backtrack to sacked skbs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backtracking to sacked skbs is a horrible performance killer since the hint cannot be advanced successfully past them... ...And it's totally unnecessary too. In theory this is 2.6.27..28 regression but I doubt anybody can make .28 to have worse performance because of other TCP improvements. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f6f61b3e677b..2471cd4f66db 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2065,7 +2065,7 @@ begin_fwd: goto begin_fwd; } else if (!(sacked & TCPCB_LOST)) { - if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) + if (hole == NULL && !(sacked & (TCPCB_SACKED_RETRANS|TCPCB_SACKED_ACKED))) hole = skb; continue; -- cgit v1.2.3 From 59a08cba6a604a265e45e9b970e372554cf46627 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:28 +0000 Subject: tcp: fix lost_cnt_hint miscounts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible that lost_cnt_hint gets underflow in tcp_clean_rtx_queue because the cumulative ACK can cover the segment where lost_skb_hint points to only partially, which means that the hint is not cleared, opposite to what my (earlier) comment claimed. Also I don't agree what I ended up writing about non-trivial case there to be what I intented to say. It was not supposed to happen that the hint won't get cleared and we underflow in any scenario. In general, this is quite hard to trigger in practice. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c28976a7e596..3f2f09091bcf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3273,18 +3273,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (tcp_is_reno(tp)) { tcp_remove_reno_sacks(sk, pkts_acked); } else { + int delta; + /* Non-retransmitted hole got filled? That's reordering */ if (reord < prior_fackets) tcp_update_reordering(sk, tp->fackets_out - reord, 0); - /* No need to care for underflows here because - * the lost_skb_hint gets NULLed if we're past it - * (or something non-trivial happened) - */ - if (tcp_is_fack(tp)) - tp->lost_cnt_hint -= pkts_acked; - else - tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; + delta = tcp_is_fack(tp) ? pkts_acked : + prior_sacked - tp->sacked_out; + tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } tp->fackets_out -= min(pkts_acked, tp->fackets_out); -- cgit v1.2.3 From 62ad27619cbcf23fb8581ae72f3806c1d90a861d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:29 +0000 Subject: tcp: deferring in middle of queue makes very little sense MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If skb can be sent right away, we certainly should do that if it's in the middle of the queue because it won't get more data into it. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 2471cd4f66db..fa3c81aa4e6a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1356,6 +1356,10 @@ static int tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) if (limit >= sk->sk_gso_max_size) goto send_now; + /* Middle in queue won't get any more data, full sendable already? */ + if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) + goto send_now; + if (sysctl_tcp_tso_win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); -- cgit v1.2.3 From d3d2ae454501a4dec360995649e1b002a2ad90c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:30 +0000 Subject: tcp: Don't clear hints when tcp_fragmenting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) We didn't remove any skbs, so no need to handle stale refs. 2) scoreboard_skb_hint is trivial, no timestamps were changed so no need to clear that one 3) lost_skb_hint needs tweaking similar to that of tcp_sacktag_one(). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fa3c81aa4e6a..3feab4d6929d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -771,7 +771,6 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, BUG_ON(len > skb->len); - tcp_clear_retrans_hints_partial(tp); nsize = skb_headlen(skb) - len; if (nsize < 0) nsize = 0; @@ -854,6 +853,12 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, tcp_verify_left_out(tp); } tcp_adjust_fackets_out(sk, skb, diff); + + if (tp->lost_skb_hint && + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->lost_skb_hint)->seq) && + (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) + tp->lost_cnt_hint -= diff; } /* Link BUFF into the send queue. */ -- cgit v1.2.3 From 02276f3c962fd408fa9d441251067845f948bfcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:31 +0000 Subject: tcp: fix corner case issue in segmentation during rexmitting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If cur_mss grew very recently so that the previously G/TSOed skb now fits well into a single segment it would get send up in parts unless we calculate # of segments again. This corner-case could happen eg. after mtu probe completes or less than previously sack blocks are required for the opposite direction. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3feab4d6929d..77af7faf38a9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1921,6 +1921,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (skb->len > cur_mss) { if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ + } else { + tcp_init_tso_segs(sk, skb, cur_mss); } tcp_retrans_try_collapse(sk, skb, cur_mss); -- cgit v1.2.3 From d0af4160d19ff2849386140881e729f9ba86f2aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:32 +0000 Subject: tcp: remove redundant code from tcp_mark_lost_retrans MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arnd Hannemann noticed and was puzzled by the fact that !tcp_is_fack(tp) leads to early return near the beginning and the later on tcp_is_fack(tp) was still used in an if condition. The later check was a left-over from RFC3517 SACK stuff (== !tcp_is_fack(tp) behavior nowadays) as there wasn't clear way how to handle this particular check cheaply in the spirit of RFC3517 (using only SACK blocks, not holes + SACK blocks as with FACK). I sort of left it there as a reminder but since it's confusing other people just remove it and comment the missing-feature stuff instead. Signed-off-by: Ilpo Järvinen Cc: Arnd Hannemann Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f2f09091bcf..125b4517f368 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1178,10 +1178,18 @@ static void tcp_mark_lost_retrans(struct sock *sk) if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) continue; - if (after(received_upto, ack_seq) && - (tcp_is_fack(tp) || - !before(received_upto, - ack_seq + tp->reordering * tp->mss_cache))) { + /* TODO: We would like to get rid of tcp_is_fack(tp) only + * constraint here (see above) but figuring out that at + * least tp->reordering SACK blocks reside between ack_seq + * and received_upto is not easy task to do cheaply with + * the available datastructures. + * + * Whether FACK should check here for tp->reordering segs + * in-between one could argue for either way (it would be + * rather simple to implement as we could count fack_count + * during the walk and do tp->fackets_out - fack_count). + */ + if (after(received_upto, ack_seq)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); -- cgit v1.2.3 From 7363a5b233734dba339f2874ff6ed6c489d3d865 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:33 +0000 Subject: tcp: separate timeout marking loop to it's own function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some comment about its current state added. So far I have seen very few cases where the thing is actually useful, usually just marginally (though admittedly I don't usually see top of window losses where it seems possible that there could be some gain), instead, more often the cases suffer from L-marking spike which is certainly not desirable (I'll bury improving it to my todo list, but on a low prio position). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 63 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 39 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 125b4517f368..03f5ede87224 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2461,6 +2461,44 @@ static int tcp_time_to_recover(struct sock *sk) return 0; } +/* New heuristics: it is possible only after we switched to restart timer + * each time when something is ACKed. Hence, we can detect timed out packets + * during fast retransmit without falling to slow start. + * + * Usefulness of this as is very questionable, since we should know which of + * the segments is the next to timeout which is relatively expensive to find + * in general case unless we add some data structure just for that. The + * current approach certainly won't find the right one too often and when it + * finally does find _something_ it usually marks large part of the window + * right away (because a retransmission with a larger timestamp blocks the + * loop from advancing). -ij + */ +static void tcp_timeout_skbs(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (!tcp_is_fack(tp) || !tcp_head_timedout(sk)) + return; + + skb = tp->scoreboard_skb_hint; + if (tp->scoreboard_skb_hint == NULL) + skb = tcp_write_queue_head(sk); + + tcp_for_write_queue_from(skb, sk) { + if (skb == tcp_send_head(sk)) + break; + if (!tcp_skb_timedout(sk, skb)) + break; + + tcp_skb_mark_lost(tp, skb); + } + + tp->scoreboard_skb_hint = skb; + + tcp_verify_left_out(tp); +} + /* Mark head of queue up as lost. With RFC3517 SACK, the packets is * is against sacked "cnt", otherwise it's against facked "cnt" */ @@ -2533,30 +2571,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) tcp_mark_head_lost(sk, sacked_upto); } - /* New heuristics: it is possible only after we switched - * to restart timer each time when something is ACKed. - * Hence, we can detect timed out packets during fast - * retransmit without falling to slow start. - */ - if (tcp_is_fack(tp) && tcp_head_timedout(sk)) { - struct sk_buff *skb; - - skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint - : tcp_write_queue_head(sk); - - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - if (!tcp_skb_timedout(sk, skb)) - break; - - tcp_skb_mark_lost(tp, skb); - } - - tp->scoreboard_skb_hint = skb; - - tcp_verify_left_out(tp); - } + tcp_timeout_skbs(sk); } /* CWND moderation, preventing bursts due to too big ACKs -- cgit v1.2.3 From bc079e9ede5fb0225bd3e84891a6266f77142094 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:34 +0000 Subject: tcp: cleanup ca_state mess in tcp_timer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Redundant checks made indentation impossible to follow. However, it might be useful to make this ca_state+is_sack indexed array. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_timer.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 0170e914f1b0..b144a26359bc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -328,19 +328,16 @@ static void tcp_retransmit_timer(struct sock *sk) if (icsk->icsk_retransmits == 0) { int mib_idx; - if (icsk->icsk_ca_state == TCP_CA_Disorder || - icsk->icsk_ca_state == TCP_CA_Recovery) { - if (tcp_is_sack(tp)) { - if (icsk->icsk_ca_state == TCP_CA_Recovery) - mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; - else - mib_idx = LINUX_MIB_TCPSACKFAILURES; - } else { - if (icsk->icsk_ca_state == TCP_CA_Recovery) - mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; - else - mib_idx = LINUX_MIB_TCPRENOFAILURES; - } + if (icsk->icsk_ca_state == TCP_CA_Disorder) { + if (tcp_is_sack(tp)) + mib_idx = LINUX_MIB_TCPSACKFAILURES; + else + mib_idx = LINUX_MIB_TCPRENOFAILURES; + } else if (icsk->icsk_ca_state == TCP_CA_Recovery) { + if (tcp_is_sack(tp)) + mib_idx = LINUX_MIB_TCPSACKRECOVERYFAIL; + else + mib_idx = LINUX_MIB_TCPRENORECOVERYFAIL; } else if (icsk->icsk_ca_state == TCP_CA_Loss) { mib_idx = LINUX_MIB_TCPLOSSFAILURES; } else { -- cgit v1.2.3 From e6c7d0857905f1d642cb8dbadae6794bfa1dff30 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:35 +0000 Subject: tcp: drop unnecessary local var in collapse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 77af7faf38a9..61445b57610c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1767,11 +1767,9 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); int skb_size, next_skb_size; - u16 flags; skb_size = skb->len; next_skb_size = next_skb->len; - flags = TCP_SKB_CB(skb)->flags; BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1); @@ -1791,9 +1789,8 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) /* Update sequence range on original skb. */ TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(next_skb)->end_seq; - /* Merge over control information. */ - flags |= TCP_SKB_CB(next_skb)->flags; /* This moves PSH/FIN etc. over */ - TCP_SKB_CB(skb)->flags = flags; + /* Merge over control information. This moves PSH/FIN etc. over */ + TCP_SKB_CB(skb)->flags |= TCP_SKB_CB(next_skb)->flags; /* All done, get rid of second SKB and account for it so * packet counting does not break. -- cgit v1.2.3 From 571a5dd8d01f2a7e279c502fa220a69262d73694 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:36 +0000 Subject: htcp: merge icsk_ca_state compare MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to what is done elsewhere in TCP code when double state checks are being done. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_htcp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 937549b8a921..26d5c7fc7de5 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -115,8 +115,7 @@ static void measure_achieved_throughput(struct sock *sk, u32 pkts_acked, s32 rtt return; /* achieved throughput calculations */ - if (icsk->icsk_ca_state != TCP_CA_Open && - icsk->icsk_ca_state != TCP_CA_Disorder) { + if (!((1 << icsk->icsk_ca_state) & (TCPF_CA_Open | TCPF_CA_Disorder))) { ca->packetcount = 0; ca->lasttime = now; return; -- cgit v1.2.3 From 758ce5c8d11d6fc57fe5f1dbc237aa8ff6386eac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:37 +0000 Subject: tcp: add helper for AI algorithm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that implementation in yeah was inconsistent to what other did as it would increase cwnd one ack earlier than the others do. Size benefits: bictcp_cong_avoid | -36 tcp_cong_avoid_ai | +52 bictcp_cong_avoid | -34 tcp_scalable_cong_avoid | -36 tcp_veno_cong_avoid | -12 tcp_yeah_cong_avoid | -38 = -104 bytes total Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_bic.c | 11 +---------- net/ipv4/tcp_cong.c | 21 ++++++++++++++------- net/ipv4/tcp_cubic.c | 11 +---------- net/ipv4/tcp_scalable.c | 10 ++-------- net/ipv4/tcp_veno.c | 7 +------ net/ipv4/tcp_yeah.c | 9 +-------- 6 files changed, 20 insertions(+), 49 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 7eb7636db0d0..3b53fd1af23f 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -149,16 +149,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tcp_slow_start(tp); else { bictcp_update(ca, tp->snd_cwnd); - - /* In dangerous area, increase slowly. - * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd - */ - if (tp->snd_cwnd_cnt >= ca->cnt) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } else - tp->snd_cwnd_cnt++; + tcp_cong_avoid_ai(tp, ca->cnt); } } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 4ec5b4e97c4e..e92beb9e55e0 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -336,6 +336,19 @@ void tcp_slow_start(struct tcp_sock *tp) } EXPORT_SYMBOL_GPL(tcp_slow_start); +/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w) */ +void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w) +{ + if (tp->snd_cwnd_cnt >= w) { + if (tp->snd_cwnd < tp->snd_cwnd_clamp) + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } else { + tp->snd_cwnd_cnt++; + } +} +EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); + /* * TCP Reno congestion control * This is special case used for fallback as well. @@ -365,13 +378,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tp->snd_cwnd++; } } else { - /* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } else - tp->snd_cwnd_cnt++; + tcp_cong_avoid_ai(tp, tp->snd_cwnd); } } EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index ee467ec40c4f..71d5f2f29fa6 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -294,16 +294,7 @@ static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) tcp_slow_start(tp); } else { bictcp_update(ca, tp->snd_cwnd); - - /* In dangerous area, increase slowly. - * In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd - */ - if (tp->snd_cwnd_cnt >= ca->cnt) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } else - tp->snd_cwnd_cnt++; + tcp_cong_avoid_ai(tp, ca->cnt); } } diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index 4660b088a8ce..a76513779e2b 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -24,14 +24,8 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) if (tp->snd_cwnd <= tp->snd_ssthresh) tcp_slow_start(tp); - else { - tp->snd_cwnd_cnt++; - if (tp->snd_cwnd_cnt > min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)){ - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } - } + else + tcp_cong_avoid_ai(tp, min(tp->snd_cwnd, TCP_SCALABLE_AI_CNT)); } static u32 tcp_scalable_ssthresh(struct sock *sk) diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index d08b2e855c22..e9bbff746488 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -159,12 +159,7 @@ static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) /* In the "non-congestive state", increase cwnd * every rtt. */ - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - if (tp->snd_cwnd < tp->snd_cwnd_clamp) - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } else - tp->snd_cwnd_cnt++; + tcp_cong_avoid_ai(tp, tp->snd_cwnd); } else { /* In the "congestive state", increase cwnd * every other rtt. diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 9ec843a9bbb2..66b6821b984e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -94,14 +94,7 @@ static void tcp_yeah_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) } else { /* Reno */ - - if (tp->snd_cwnd_cnt < tp->snd_cwnd) - tp->snd_cwnd_cnt++; - - if (tp->snd_cwnd_cnt >= tp->snd_cwnd) { - tp->snd_cwnd++; - tp->snd_cwnd_cnt = 0; - } + tcp_cong_avoid_ai(tp, tp->snd_cwnd); } /* The key players are v_vegas.beg_snd_una and v_beg_snd_nxt. -- cgit v1.2.3 From cabeccbd172cc305f4383f5a4808ae254745275f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:38 +0000 Subject: tcp: kill eff_sacks "cache", the sole user can calculate itself MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Also fixes insignificant bug that would cause sending of stale SACK block (would occur in some corner cases). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 15 ++------------- net/ipv4/tcp_minisocks.c | 3 +-- net/ipv4/tcp_output.c | 12 ++++++------ 3 files changed, 9 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 03f5ede87224..e4442a293eb0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4099,7 +4099,6 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) tp->rx_opt.dsack = 1; tp->duplicate_sack[0].start_seq = seq; tp->duplicate_sack[0].end_seq = end_seq; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + 1; } } @@ -4154,8 +4153,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) * Decrease num_sacks. */ tp->rx_opt.num_sacks--; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + - tp->rx_opt.dsack; for (i = this_sack; i < tp->rx_opt.num_sacks; i++) sp[i] = sp[i + 1]; continue; @@ -4218,7 +4215,6 @@ new_sack: sp->start_seq = seq; sp->end_seq = end_seq; tp->rx_opt.num_sacks++; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; } /* RCV.NXT advances, some SACKs should be eaten. */ @@ -4232,7 +4228,6 @@ static void tcp_sack_remove(struct tcp_sock *tp) /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ if (skb_queue_empty(&tp->out_of_order_queue)) { tp->rx_opt.num_sacks = 0; - tp->rx_opt.eff_sacks = tp->rx_opt.dsack; return; } @@ -4253,11 +4248,8 @@ static void tcp_sack_remove(struct tcp_sock *tp) this_sack++; sp++; } - if (num_sacks != tp->rx_opt.num_sacks) { + if (num_sacks != tp->rx_opt.num_sacks) tp->rx_opt.num_sacks = num_sacks; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks + - tp->rx_opt.dsack; - } } /* This one checks to see if we can put data from the @@ -4333,10 +4325,8 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) TCP_ECN_accept_cwr(tp, skb); - if (tp->rx_opt.dsack) { + if (tp->rx_opt.dsack) tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; - } /* Queue data for delivery to the user. * Packets in sequence go to the receive queue. @@ -4456,7 +4446,6 @@ drop: if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks = 1; tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f67effbb102b..bb3d8b35f19a 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -434,9 +434,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.saw_tstamp = 0; newtp->rx_opt.dsack = 0; - newtp->rx_opt.eff_sacks = 0; - newtp->rx_opt.num_sacks = 0; + newtp->urg_data = 0; if (sock_flag(newsk, SOCK_KEEPOPEN)) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 61445b57610c..1555bb73b638 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -441,10 +441,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(sp[this_sack].end_seq); } - if (tp->rx_opt.dsack) { + if (tp->rx_opt.dsack) tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; - } } } @@ -550,6 +548,7 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; struct tcp_sock *tp = tcp_sk(sk); unsigned size = 0; + unsigned int eff_sacks; #ifdef CONFIG_TCP_MD5SIG *md5 = tp->af_specific->md5_lookup(sk, sk); @@ -568,10 +567,11 @@ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, size += TCPOLEN_TSTAMP_ALIGNED; } - if (unlikely(tp->rx_opt.eff_sacks)) { + eff_sacks = tp->rx_opt.num_sacks + tp->rx_opt.dsack; + if (unlikely(eff_sacks)) { const unsigned remaining = MAX_TCP_OPTION_SPACE - size; opts->num_sack_blocks = - min_t(unsigned, tp->rx_opt.eff_sacks, + min_t(unsigned, eff_sacks, (remaining - TCPOLEN_SACK_BASE_ALIGNED) / TCPOLEN_SACK_PERBLOCK); size += TCPOLEN_SACK_BASE_ALIGNED + @@ -1418,7 +1418,7 @@ static int tcp_mtu_probe(struct sock *sk) icsk->icsk_mtup.probe_size || inet_csk(sk)->icsk_ca_state != TCP_CA_Open || tp->snd_cwnd < 11 || - tp->rx_opt.eff_sacks) + tp->rx_opt.num_sacks || tp->rx_opt.dsack) return -1; /* Very simple search strategy: just double the MSS. */ -- cgit v1.2.3 From 0d6a775e27d975e5f9ea8e2911216d84face50ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:41 +0000 Subject: tcp: in sendmsg/pages open code the real goto target MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit copied was assigned zero right before the goto, so if (copied) cannot ever be true. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 90b2f3c192ff..d3f9beee74c0 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -683,7 +683,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto do_error; + goto out_err; while (psize > 0) { struct sk_buff *skb = tcp_write_queue_tail(sk); @@ -854,7 +854,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, err = -EPIPE; if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - goto do_error; + goto out_err; while (--iovlen >= 0) { int seglen = iov->iov_len; -- cgit v1.2.3 From 9ce01461028d595a6f1cd724fbd7a0dd70464fe4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 28 Feb 2009 04:44:42 +0000 Subject: tcp: get rid of two unnecessary u16s in TCP skb flags copying MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I guess these fields were one day 16-bit in the struct but nowadays they're just using 8 bits anyway. This is just a precaution, didn't result any change in my case but who knows what all those varying gcc versions & options do. I've been told that 16-bit is not so nice with some cpus. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 1555bb73b638..920c57b90ded 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -767,7 +767,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, struct sk_buff *buff; int nsize, old_factor; int nlen; - u16 flags; + u8 flags; BUG_ON(len > skb->len); @@ -1282,7 +1282,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, { struct sk_buff *buff; int nlen = skb->len - len; - u16 flags; + u8 flags; /* All of a TSO frame must be composed of paged data. */ if (skb->len != skb->data_len) -- cgit v1.2.3 From 361a5c1dd0bd7bb2b90e7fe9127b366d3566522e Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 27 Feb 2009 22:38:28 +0000 Subject: dccp: Minimise header option overhead in setting the MPS This patch resolves a long-standing FIXME to dynamically update the Maximum Packet Size depending on actual options usage. It uses the flags set by the feature-negotiation infrastructure to compute the required header option size. Most options are fixed-size, a notable exception are Ack Vectors (required currently only by CCID-2). These can have any length between 3 and 1020 bytes. As a result of testing, 16 bytes (2 bytes for type/length plus 14 Ack Vector cells) have been found to be sufficient for loss-free situations. There are currently no CCID-specific header options which may appear on data packets, thus it is not necessary to define a corresponding CCID field as suggested in the old comment. Further changes: ---------------- Adjusted the type of 'cur_mps' to match the unsigned return type of the function. Signed-off-by: Gerrit Renker Acked-by: Ian McDonald Signed-off-by: David S. Miller --- net/dccp/ackvec.h | 3 +++ net/dccp/output.c | 22 ++++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/dccp/ackvec.h b/net/dccp/ackvec.h index 45f95e55f873..7ea557b7c6b1 100644 --- a/net/dccp/ackvec.h +++ b/net/dccp/ackvec.h @@ -20,6 +20,9 @@ /* We can spread an ack vector across multiple options */ #define DCCP_MAX_ACKVEC_LEN (DCCP_SINGLE_OPT_MAXLEN * 2) +/* Estimated minimum average Ack Vector length - used for updating MPS */ +#define DCCPAV_MIN_OPTLEN 16 + #define DCCP_ACKVEC_STATE_RECEIVED 0 #define DCCP_ACKVEC_STATE_ECN_MARKED (1 << 6) #define DCCP_ACKVEC_STATE_NOT_RECEIVED (3 << 6) diff --git a/net/dccp/output.c b/net/dccp/output.c index 22a618af4893..27c79bcc6a1e 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -161,21 +161,27 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu) struct inet_connection_sock *icsk = inet_csk(sk); struct dccp_sock *dp = dccp_sk(sk); u32 ccmps = dccp_determine_ccmps(dp); - int cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; + u32 cur_mps = ccmps ? min(pmtu, ccmps) : pmtu; /* Account for header lengths and IPv4/v6 option overhead */ cur_mps -= (icsk->icsk_af_ops->net_header_len + icsk->icsk_ext_hdr_len + sizeof(struct dccp_hdr) + sizeof(struct dccp_hdr_ext)); /* - * FIXME: this should come from the CCID infrastructure, where, say, - * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets - * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED - * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to - * make it a multiple of 4 + * Leave enough headroom for common DCCP header options. + * This only considers options which may appear on DCCP-Data packets, as + * per table 3 in RFC 4340, 5.8. When running out of space for other + * options (eg. Ack Vector which can take up to 255 bytes), it is better + * to schedule a separate Ack. Thus we leave headroom for the following: + * - 1 byte for Slow Receiver (11.6) + * - 6 bytes for Timestamp (13.1) + * - 10 bytes for Timestamp Echo (13.3) + * - 8 bytes for NDP count (7.7, when activated) + * - 6 bytes for Data Checksum (9.3) + * - %DCCPAV_MIN_OPTLEN bytes for Ack Vector size (11.4, when enabled) */ - - cur_mps -= roundup(5 + 6 + 10 + 6 + 6 + 6, 4); + cur_mps -= roundup(1 + 6 + 10 + dp->dccps_send_ndp_count * 8 + 6 + + (dp->dccps_hc_rx_ackvec ? DCCPAV_MIN_OPTLEN : 0), 4); /* And store cached results */ icsk->icsk_pmtu_cookie = pmtu; -- cgit v1.2.3 From 86739fb96e8c8269fc5b3d300c959bede272a6f6 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Fri, 27 Feb 2009 22:38:29 +0000 Subject: dccp: Do not let initial option overhead shrink the MPS This fixes a problem caused by the overlap of the connection-setup and established-state phases of DCCP connections. During connection setup, the client retransmits Confirm Feature-Negotiation options until a response from the server signals that it can move from the half-established PARTOPEN into the OPEN state, whereupon the connection is fully established on both ends (RFC 4340, 8.1.5). However, since the client may already send data while it is in the PARTOPEN state, consequences arise for the Maximum Packet Size: the problem is that the initial option overhead is much higher than for the subsequent established phase, as it involves potentially many variable-length list-type options (server-priority options, RFC 4340, 6.4). Applying the standard MPS is insufficient here: especially with larger payloads this can lead to annoying, counter-intuitive EMSGSIZE errors. On the other hand, reducing the MPS available for the established phase by the added initial overhead is highly wasteful and inefficient. The solution chosen therefore is a two-phase strategy: If the payload length of the DataAck in PARTOPEN is too large, an Ack is sent to carry the options, and the feature-negotiation list is then flushed. This means that the server gets two Acks for one Response. If both Acks get lost, it is probably better to restart the connection anyway and devising yet another special-case does not seem worth the extra complexity. The result is a higher utilisation of the available packet space for the data transmission phase (established state) of a connection. The patch (over-)estimates the initial overhead to be 32*4 bytes -- commonly seen values were around 90 bytes for initial feature-negotiation options. It uses sizeof(u32) to mean "aligned units of 4 bytes". For consistency, another use of 4-byte alignment is adapted. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- net/dccp/dccp.h | 5 ++++- net/dccp/output.c | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h index 08a569ff02d1..d6bc47363b1c 100644 --- a/net/dccp/dccp.h +++ b/net/dccp/dccp.h @@ -63,11 +63,14 @@ extern void dccp_time_wait(struct sock *sk, int state, int timeo); * - DCCP-Reset with ACK Subheader and 4 bytes of Reset Code fields * Hence a safe upper bound for the maximum option length is 1020-28 = 992 */ -#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(int)) +#define MAX_DCCP_SPECIFIC_HEADER (255 * sizeof(uint32_t)) #define DCCP_MAX_PACKET_HDR 28 #define DCCP_MAX_OPT_LEN (MAX_DCCP_SPECIFIC_HEADER - DCCP_MAX_PACKET_HDR) #define MAX_DCCP_HEADER (MAX_DCCP_SPECIFIC_HEADER + MAX_HEADER) +/* Upper bound for initial feature-negotiation overhead (padded to 32 bits) */ +#define DCCP_FEATNEG_OVERHEAD (32 * sizeof(uint32_t)) + #define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT * state, about 60 seconds */ diff --git a/net/dccp/output.c b/net/dccp/output.c index 27c79bcc6a1e..36bcc00654d3 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -276,7 +276,20 @@ void dccp_write_xmit(struct sock *sk, int block) const int len = skb->len; if (sk->sk_state == DCCP_PARTOPEN) { - /* See 8.1.5. Handshake Completion */ + const u32 cur_mps = dp->dccps_mss_cache - DCCP_FEATNEG_OVERHEAD; + /* + * See 8.1.5 - Handshake Completion. + * + * For robustness we resend Confirm options until the client has + * entered OPEN. During the initial feature negotiation, the MPS + * is smaller than usual, reduced by the Change/Confirm options. + */ + if (!list_empty(&dp->dccps_featneg) && len > cur_mps) { + DCCP_WARN("Payload too large (%d) for featneg.\n", len); + dccp_send_ack(sk); + dccp_feat_list_purge(&dp->dccps_featneg); + } + inet_csk_schedule_ack(sk); inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, -- cgit v1.2.3 From d1dd524785e30cf3d64d395d829b207376acb0aa Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 2 Mar 2009 06:46:50 +0000 Subject: sctp: fix crash during module unload An extra list_del() during the module load failure and unload resulted in a crash with a list corruption. Now sctp can be unloaded again. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index b78e3be69013..4e6638449639 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1322,9 +1322,8 @@ SCTP_STATIC __init int sctp_init(void) out: return status; err_v6_add_protocol: - sctp_v6_del_protocol(); -err_add_protocol: sctp_v4_del_protocol(); +err_add_protocol: inet_ctl_sock_destroy(sctp_ctl_sock); err_ctl_sock_init: sctp_v6_protosw_exit(); @@ -1335,7 +1334,6 @@ err_protosw_init: sctp_v4_pf_exit(); sctp_v6_pf_exit(); sctp_sysctl_unregister(); - list_del(&sctp_af_inet.list); free_pages((unsigned long)sctp_port_hashtable, get_order(sctp_port_hashsize * sizeof(struct sctp_bind_hashbucket))); @@ -1383,7 +1381,6 @@ SCTP_STATIC __exit void sctp_exit(void) sctp_v4_pf_exit(); sctp_sysctl_unregister(); - list_del(&sctp_af_inet.list); free_pages((unsigned long)sctp_assoc_hashtable, get_order(sctp_assoc_hashsize * -- cgit v1.2.3 From 3df2678737974accf437dad11e584c1871a3ede3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 2 Mar 2009 06:46:51 +0000 Subject: sctp: fix kernel panic with ERROR chunk containing too many error causes If ERROR chunk is received with too many error causes in ESTABLISHED state, the kernel get panic. This is because sctp limit the max length of cmds to 14, but while ERROR chunk is received, one error cause will add around 2 cmds by sctp_add_cmd_sf(). So many error causes will fill the limit of cmds and panic. This patch fixed the problem. This bug can be test by SCTP Conformance Test Suite . Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 54 +++++++++++++++++++++++++++++------------------- net/sctp/sm_statefuns.c | 16 ++------------ 2 files changed, 35 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index e1d6076b4f59..b5495aecab60 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -787,36 +787,48 @@ static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { - struct sctp_operr_chunk *operr_chunk; struct sctp_errhdr *err_hdr; + struct sctp_ulpevent *ev; - operr_chunk = (struct sctp_operr_chunk *)chunk->chunk_hdr; - err_hdr = &operr_chunk->err_hdr; + while (chunk->chunk_end > chunk->skb->data) { + err_hdr = (struct sctp_errhdr *)(chunk->skb->data); - switch (err_hdr->cause) { - case SCTP_ERROR_UNKNOWN_CHUNK: - { - struct sctp_chunkhdr *unk_chunk_hdr; + ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0, + GFP_ATOMIC); + if (!ev) + return; - unk_chunk_hdr = (struct sctp_chunkhdr *)err_hdr->variable; - switch (unk_chunk_hdr->type) { - /* ADDIP 4.1 A9) If the peer responds to an ASCONF with an - * ERROR chunk reporting that it did not recognized the ASCONF - * chunk type, the sender of the ASCONF MUST NOT send any - * further ASCONF chunks and MUST stop its T-4 timer. - */ - case SCTP_CID_ASCONF: - asoc->peer.asconf_capable = 0; - sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP, + sctp_ulpq_tail_event(&asoc->ulpq, ev); + + switch (err_hdr->cause) { + case SCTP_ERROR_UNKNOWN_CHUNK: + { + sctp_chunkhdr_t *unk_chunk_hdr; + + unk_chunk_hdr = (sctp_chunkhdr_t *)err_hdr->variable; + switch (unk_chunk_hdr->type) { + /* ADDIP 4.1 A9) If the peer responds to an ASCONF with + * an ERROR chunk reporting that it did not recognized + * the ASCONF chunk type, the sender of the ASCONF MUST + * NOT send any further ASCONF chunks and MUST stop its + * T-4 timer. + */ + case SCTP_CID_ASCONF: + if (asoc->peer.asconf_capable == 0) + break; + + asoc->peer.asconf_capable = 0; + sctp_add_cmd_sf(cmds, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO)); + break; + default: + break; + } break; + } default: break; } - break; - } - default: - break; } } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3a0cd075914f..f88dfded0e3a 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3163,7 +3163,6 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands) { struct sctp_chunk *chunk = arg; - struct sctp_ulpevent *ev; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -3173,21 +3172,10 @@ sctp_disposition_t sctp_sf_operr_notify(const struct sctp_endpoint *ep, return sctp_sf_violation_chunklen(ep, asoc, type, arg, commands); - while (chunk->chunk_end > chunk->skb->data) { - ev = sctp_ulpevent_make_remote_error(asoc, chunk, 0, - GFP_ATOMIC); - if (!ev) - goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, + SCTP_CHUNK(chunk)); - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); - sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_OPERR, - SCTP_CHUNK(chunk)); - } return SCTP_DISPOSITION_CONSUME; - -nomem: - return SCTP_DISPOSITION_NOMEM; } /* -- cgit v1.2.3 From ee7537b63a28b42b22e48842dfeedc66d96b71f1 Mon Sep 17 00:00:00 2001 From: Hantzis Fotis Date: Mon, 2 Mar 2009 22:42:02 -0800 Subject: tcp: tcp_init_wl / tcp_update_wl argument cleanup The above functions from include/net/tcp.h have been defined with an argument that they never use. The argument is 'u32 ack' which is never used inside the function body, and thus it can be removed. The rest of the patch involves the necessary changes to the function callers of the above two functions. Signed-off-by: Hantzis Fotis Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 9 ++++----- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e4442a293eb0..5ecd7aa25979 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3416,7 +3416,7 @@ static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, if (tcp_may_update_window(tp, ack, ack_seq, nwin)) { flag |= FLAG_WIN_UPDATE; - tcp_update_wl(tp, ack, ack_seq); + tcp_update_wl(tp, ack_seq); if (tp->snd_wnd != nwin) { tp->snd_wnd = nwin; @@ -3621,7 +3621,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) * No more checks are required. * Note, we use the fact that SND.UNA>=SND.WL2. */ - tcp_update_wl(tp, ack, ack_seq); + tcp_update_wl(tp, ack_seq); tp->snd_una = ack; flag |= FLAG_WIN_UPDATE; @@ -5418,7 +5418,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * never scaled. */ tp->snd_wnd = ntohs(th->window); - tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq); + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); if (!tp->rx_opt.wscale_ok) { tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0; @@ -5679,8 +5679,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << tp->rx_opt.snd_wscale; - tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, - TCP_SKB_CB(skb)->seq); + tcp_init_wl(tp, TCP_SKB_CB(skb)->seq); /* tcp_ack considers this ACK as duplicate * and does not calculate rtt. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index bb3d8b35f19a..4b0df3e6b609 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -399,7 +399,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, tcp_prequeue_init(newtp); - tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn); + tcp_init_wl(newtp, treq->rcv_isn); newtp->srtt = 0; newtp->mdev = TCP_TIMEOUT_INIT; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 920c57b90ded..eb285befdf3b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2337,7 +2337,7 @@ static void tcp_connect_init(struct sock *sk) sk->sk_err = 0; sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; - tcp_init_wl(tp, tp->write_seq, 0); + tcp_init_wl(tp, 0); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; -- cgit v1.2.3 From 906f8257eedf64c9f4da0adfacca76d06d8e8cb0 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 2 Mar 2009 09:46:10 +0000 Subject: sctp: Add some missing types for debug message This patch add the type name "AUTH" and primitive type name "PRIMITIVE_ASCONF" for debug message. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/debug.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 67715f4eb849..7ff548a30cfb 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -86,6 +86,9 @@ const char *sctp_cname(const sctp_subtype_t cid) case SCTP_CID_FWD_TSN: return "FWD_TSN"; + case SCTP_CID_AUTH: + return "AUTH"; + default: break; } @@ -135,6 +138,7 @@ static const char *sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = { "PRIMITIVE_ABORT", "PRIMITIVE_SEND", "PRIMITIVE_REQUESTHEARTBEAT", + "PRIMITIVE_ASCONF", }; /* Lookup primitive debug name. */ -- cgit v1.2.3 From d212318c9d1b11ff44b57f90b4f9d9c9b31a6ced Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 2 Mar 2009 09:46:11 +0000 Subject: sctp: remove dup code in net/sctp/socket.c Remove dup check of "if (optlen < sizeof(int))". Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index dea864f5de54..4bc558c19fcf 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3069,9 +3069,6 @@ static int sctp_setsockopt_maxburst(struct sock *sk, int val; int assoc_id = 0; - if (optlen < sizeof(int)) - return -EINVAL; - if (optlen == sizeof(int)) { printk(KERN_WARNING "SCTP: Use of int in max_burst socket option deprecated\n"); @@ -5283,9 +5280,6 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, struct sctp_sock *sp; struct sctp_association *asoc; - if (len < sizeof(int)) - return -EINVAL; - if (len == sizeof(int)) { printk(KERN_WARNING "SCTP: Use of int in max_burst socket option deprecated\n"); -- cgit v1.2.3 From c6db93a58f1745cfe1acc2e1a1d68afc3245eced Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 2 Mar 2009 09:46:12 +0000 Subject: sctp: fix the length check in sctp_getsockopt_maxburst() The code in sctp_getsockopt_maxburst() doesn't allow len to be larger then struct sctp_assoc_value, which is a common case where app writers just pass down the sizeof(buf) or something similar. This patch fix the problem. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 4bc558c19fcf..bbd3cd238d7f 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5286,7 +5286,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, printk(KERN_WARNING "SCTP: Use struct sctp_assoc_value instead\n"); params.assoc_id = 0; - } else if (len == sizeof (struct sctp_assoc_value)) { + } else if (len >= sizeof(struct sctp_assoc_value)) { + len = sizeof(struct sctp_assoc_value); if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else -- cgit v1.2.3 From f61f6f82c90cbaa85270f26b89e3309a8c6e2e88 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 2 Mar 2009 09:46:13 +0000 Subject: sctp: use time_before or time_after for comparing jiffies The functions time_before or time_after are more robust for comparing jiffies against other values. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 ++- net/sctp/transport.c | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index bc411c896216..a367d15a21aa 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -428,7 +428,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, * retransmitting due to T3 timeout. */ if (reason == SCTP_RTXR_T3_RTX && - (jiffies - chunk->sent_at) < transport->last_rto) + time_before(jiffies, chunk->sent_at + + transport->last_rto)) continue; /* RFC 2960 6.2.1 Processing a Received SACK diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 5c29b14ee9af..e5dde45c79d3 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -543,8 +543,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * congestion indications more than once every window of * data (or more loosely more than once every round-trip time). */ - if ((jiffies - transport->last_time_ecne_reduced) > - transport->rtt) { + if (time_after(jiffies, transport->last_time_ecne_reduced + + transport->rtt)) { transport->ssthresh = max(transport->cwnd/2, 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; @@ -561,7 +561,8 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * to be done every RTO interval, we do it every hearbeat * interval. */ - if ((jiffies - transport->last_time_used) > transport->rto) + if (time_after(jiffies, transport->last_time_used + + transport->rto)) transport->cwnd = max(transport->cwnd/2, 4*transport->asoc->pathmtu); break; -- cgit v1.2.3 From 7e99013a5043cacd375375c3efad35b57c3afdba Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 2 Mar 2009 09:46:14 +0000 Subject: sctp: Fix broken RTO-doubling for data retransmits Commit faee47cdbfe8d74a1573c2f81ea6dbb08d735be6 (sctp: Fix the RTO-doubling on idle-link heartbeats) broke the RTO doubling for data retransmits. If the heartbeat was sent before the data T3-rtx time, the the RTO will not double upon the T3-rtx expiration. Distingish between the operations by passing an argument to the function. Additionally, Wei Youngjun pointed out that our treatment of requested HEARTBEATS and timer HEARTBEATS is the same wrt resetting congestion window. That needs to be separated, since user requested HEARTBEATS should not treat the link as idle. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_sideeffect.c | 32 +++++++++++++------------------- net/sctp/sm_statefuns.c | 6 ++++-- 2 files changed, 17 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 0146cfb1f182..5385150df296 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -434,7 +434,8 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { * */ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, - struct sctp_transport *transport) + struct sctp_transport *transport, + int is_hb) { /* The check for association's overall error counter exceeding the * threshold is done in the state function. @@ -466,7 +467,7 @@ static void sctp_do_8_2_transport_strike(struct sctp_association *asoc, * The first unacknowleged HB triggers it. We do this with a flag * that indicates that we have an outstanding HB. */ - if (transport->hb_sent) { + if (!is_hb || transport->hb_sent) { transport->last_rto = transport->rto; transport->rto = min((transport->rto * 2), transport->asoc->rto_max); } @@ -657,20 +658,6 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, sctp_transport_hold(t); } -/* Helper function to do a transport reset at the expiry of the hearbeat - * timer. - */ -static void sctp_cmd_transport_reset(sctp_cmd_seq_t *cmds, - struct sctp_association *asoc, - struct sctp_transport *t) -{ - sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE); - - /* Mark one strike against a transport. */ - sctp_do_8_2_transport_strike(asoc, t); - - t->hb_sent = 1; -} /* Helper function to process the process SACK command. */ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, @@ -1459,12 +1446,19 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_STRIKE: /* Mark one strike against a transport. */ - sctp_do_8_2_transport_strike(asoc, cmd->obj.transport); + sctp_do_8_2_transport_strike(asoc, cmd->obj.transport, + 0); + break; + + case SCTP_CMD_TRANSPORT_IDLE: + t = cmd->obj.transport; + sctp_transport_lower_cwnd(t, SCTP_LOWER_CWND_INACTIVE); break; - case SCTP_CMD_TRANSPORT_RESET: + case SCTP_CMD_TRANSPORT_HB_SENT: t = cmd->obj.transport; - sctp_cmd_transport_reset(commands, asoc, t); + sctp_do_8_2_transport_strike(asoc, t, 1); + t->hb_sent = 1; break; case SCTP_CMD_TRANSPORT_ON: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 3a0cd075914f..a907bab0963d 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -988,7 +988,9 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(const struct sctp_endpoint *ep, /* Set transport error counter and association error counter * when sending heartbeat. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET, + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_IDLE, + SCTP_TRANSPORT(transport)); + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, SCTP_TRANSPORT(transport)); } sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMER_UPDATE, @@ -4967,7 +4969,7 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( * to that address and not acknowledged within one RTO. * */ - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_RESET, + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSPORT_HB_SENT, SCTP_TRANSPORT(arg)); return SCTP_DISPOSITION_CONSUME; } -- cgit v1.2.3 From 5a5990d3090b03745a9548a6f5edef02095675cf Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 26 Feb 2009 06:49:24 +0000 Subject: net: Avoid race between network down and sysfs Signed-off-by: Stephen Hemminger Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 6ac29a46e23e..484f58750eba 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -77,7 +77,9 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (endp == buf) goto err; - rtnl_lock(); + if (!rtnl_trylock()) + return -ERESTARTSYS; + if (dev_isalive(net)) { if ((ret = (*set)(net, new)) == 0) ret = len; -- cgit v1.2.3 From b325fddb7f869e6c95a88dc6573220f162e5b89f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 26 Feb 2009 06:55:31 +0000 Subject: ipv6: Fix sysctl unregistration deadlock Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f9afb452249c..40fabdee42ae 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -493,15 +493,17 @@ static void addrconf_forward_change(struct net *net, __s32 newf) read_unlock(&dev_base_lock); } -static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) +static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) { struct net *net; net = (struct net *)table->extra2; if (p == &net->ipv6.devconf_dflt->forwarding) - return; + return 0; + + if (!rtnl_trylock()) + return -ERESTARTSYS; - rtnl_lock(); if (p == &net->ipv6.devconf_all->forwarding) { __s32 newf = net->ipv6.devconf_all->forwarding; net->ipv6.devconf_dflt->forwarding = newf; @@ -512,6 +514,7 @@ static void addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old) if (*p) rt6_purge_dflt_routers(net); + return 1; } #endif @@ -3983,7 +3986,7 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); if (write) - addrconf_fixup_forwarding(ctl, valp, val); + ret = addrconf_fixup_forwarding(ctl, valp, val); return ret; } @@ -4019,8 +4022,7 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, } *valp = new; - addrconf_fixup_forwarding(table, valp, val); - return 1; + return addrconf_fixup_forwarding(table, valp, val); } static struct addrconf_sysctl_table -- cgit v1.2.3 From 176c39af29bc4edaf37f663553eeaacd47b5bc9c Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 3 Mar 2009 01:06:45 -0800 Subject: netns: fix addrconf_ifdown kernel panic When a network namespace is destroyed the network interfaces are all unregistered, making addrconf_ifdown called by the netdevice notifier. In the other hand, the addrconf exit method does a loop on the network devices and does addrconf_ifdown on each of them. But the ordering of the netns subsystem is not right because it uses the register_pernet_device instead of register_pernet_subsys. If we handle the loopback as any network device, we can safely use register_pernet_subsys. But if we use register_pernet_subsys, the addrconf exit method will do exactly what was already done with the unregistering of the network devices. So in definitive, this code is pointless. I removed the netns addrconf exit method and moved the code to the addrconf cleanup function. Signed-off-by: Daniel Lezcano Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 39 +++++++++------------------------------ 1 file changed, 9 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 40fabdee42ae..1220e2c7831e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2611,9 +2611,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) ASSERT_RTNL(); - if ((dev->flags & IFF_LOOPBACK) && how == 1) - how = 0; - rt6_ifdown(net, dev); neigh_ifdown(&nd_tbl, dev); @@ -4448,25 +4445,6 @@ int unregister_inet6addr_notifier(struct notifier_block *nb) EXPORT_SYMBOL(unregister_inet6addr_notifier); -static void addrconf_net_exit(struct net *net) -{ - struct net_device *dev; - - rtnl_lock(); - /* clean dev list */ - for_each_netdev(net, dev) { - if (__in6_dev_get(dev) == NULL) - continue; - addrconf_ifdown(dev, 1); - } - addrconf_ifdown(net->loopback_dev, 2); - rtnl_unlock(); -} - -static struct pernet_operations addrconf_net_ops = { - .exit = addrconf_net_exit, -}; - /* * Init / cleanup code */ @@ -4508,10 +4486,6 @@ int __init addrconf_init(void) if (err) goto errlo; - err = register_pernet_device(&addrconf_net_ops); - if (err) - return err; - register_netdevice_notifier(&ipv6_dev_notf); addrconf_verify(0); @@ -4541,15 +4515,22 @@ errlo: void addrconf_cleanup(void) { struct inet6_ifaddr *ifa; + struct net_device *dev; int i; unregister_netdevice_notifier(&ipv6_dev_notf); - unregister_pernet_device(&addrconf_net_ops); - unregister_pernet_subsys(&addrconf_ops); rtnl_lock(); + /* clean dev list */ + for_each_netdev(&init_net, dev) { + if (__in6_dev_get(dev) == NULL) + continue; + addrconf_ifdown(dev, 1); + } + addrconf_ifdown(init_net.loopback_dev, 2); + /* * Check hash table. */ @@ -4570,6 +4551,4 @@ void addrconf_cleanup(void) del_timer(&addr_chk_timer); rtnl_unlock(); - - unregister_pernet_subsys(&addrconf_net_ops); } -- cgit v1.2.3 From 6eb0777228f31932fc941eafe8b08848466630a1 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Feb 2009 00:09:14 -0800 Subject: netns: Fix icmp shutdown. Recently I had a kernel panic in icmp_send during a network namespace cleanup. There were packets in the arp queue that failed to be sent and we attempted to generate an ICMP host unreachable message, but failed because icmp_sk_exit had already been called. The network devices are removed from a network namespace and their arp queues are flushed before we do attempt to shutdown subsystems so this error should have been impossible. It turns out icmp_init is using register_pernet_device instead of register_pernet_subsys. Which resulted in icmp being shut down while we still had the possibility of packets in flight, making a nasty NULL pointer deference in interrupt context possible. Changing this to register_pernet_subsys fixes the problem in my testing. Signed-off-by: Eric W. Biederman Acked-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 705b33b184a3..fc562d29cc46 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1205,7 +1205,7 @@ static struct pernet_operations __net_initdata icmp_sk_ops = { int __init icmp_init(void) { - return register_pernet_device(&icmp_sk_ops); + return register_pernet_subsys(&icmp_sk_ops); } EXPORT_SYMBOL(icmp_err_convert); -- cgit v1.2.3 From 2f20d2e667ab1ca44cde5fb361386dff5bb6081d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Feb 2009 00:10:18 -0800 Subject: tcp: Like icmp use register_pernet_subsys To remove the possibility of packets flying around when network devices are being cleaned up use reisger_pernet_subsys instead of register_pernet_device. Signed-off-by: Eric W. Biederman Acked-by: Denis V. Lunev Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 19d7b429a262..cf74c416831a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2443,7 +2443,7 @@ static struct pernet_operations __net_initdata tcp_sk_ops = { void __init tcp_v4_init(void) { inet_hashinfo_init(&tcp_hashinfo); - if (register_pernet_device(&tcp_sk_ops)) + if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); } -- cgit v1.2.3 From 17edde520927070a6bf14a6a75027c0b843443e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 22 Feb 2009 00:11:09 -0800 Subject: netns: Remove net_alive It turns out that net_alive is unnecessary, and the original problem that led to it being added was simply that the icmp code thought it was a network device and wound up being unable to handle packets while there were still packets in the network namespace. Now that icmp and tcp have been fixed to properly register themselves this problem is no longer present and we have a stronger guarantee that packets will not arrive in a network namespace then that provided by net_alive in netif_receive_skb. So remove net_alive allowing packet reception run a little faster. Additionally document the strong reason why network namespace cleanup is safe so that if something happens again someone else will have a chance of figuring it out. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 6 ------ net/core/net_namespace.c | 3 --- 2 files changed, 9 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 72b0d26fd46d..9e4afe650e7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2267,12 +2267,6 @@ int netif_receive_skb(struct sk_buff *skb) rcu_read_lock(); - /* Don't receive packets in an exiting network namespace */ - if (!net_alive(dev_net(skb->dev))) { - kfree_skb(skb); - goto out; - } - #ifdef CONFIG_NET_CLS_ACT if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2adb1a7d361f..e3bebd36f053 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -157,9 +157,6 @@ static void cleanup_net(struct work_struct *work) struct pernet_operations *ops; struct net *net; - /* Be very certain incoming network packets will not find us */ - rcu_barrier(); - net = container_of(work, struct net, work); mutex_lock(&net_mutex); -- cgit v1.2.3 From abb79972b4d1dff00f79cb0d123173abac48a6ae Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 3 Mar 2009 21:39:40 -0800 Subject: rds: fix iband RDMA dependencies Fix RDS Infiniband dependencies for RDMA so that these build errors won't happen: ERROR: "rdma_accept" [net/rds/rds.ko] undefined! ERROR: "rdma_destroy_id" [net/rds/rds.ko] undefined! ERROR: "rdma_connect" [net/rds/rds.ko] undefined! ERROR: "rdma_destroy_qp" [net/rds/rds.ko] undefined! ERROR: "rdma_listen" [net/rds/rds.ko] undefined! ERROR: "rdma_notify" [net/rds/rds.ko] undefined! ERROR: "rdma_create_id" [net/rds/rds.ko] undefined! ERROR: "rdma_create_qp" [net/rds/rds.ko] undefined! ERROR: "rdma_bind_addr" [net/rds/rds.ko] undefined! ERROR: "rdma_resolve_route" [net/rds/rds.ko] undefined! ERROR: "rdma_disconnect" [net/rds/rds.ko] undefined! ERROR: "rdma_reject" [net/rds/rds.ko] undefined! ERROR: "rdma_resolve_addr" [net/rds/rds.ko] undefined! Signed-off-by: Randy Dunlap Acked-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/rds/Kconfig b/net/rds/Kconfig index 63bd370ab6ee..796773b5df9b 100644 --- a/net/rds/Kconfig +++ b/net/rds/Kconfig @@ -2,6 +2,7 @@ config RDS tristate "Reliable Datagram Sockets (RDS) (EXPERIMENTAL)" depends on INET && INFINIBAND_IPOIB && EXPERIMENTAL + depends on INFINIBAND && INFINIBAND_ADDR_TRANS ---help--- RDS provides reliable, sequenced delivery of datagrams over Infiniband. -- cgit v1.2.3 From e9cc8bddaea3944fabfebb968bc88d603239beed Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 4 Mar 2009 14:53:30 +0800 Subject: netlink: Move netlink attribute parsing support to lib Netlink attribute parsing may be used even if CONFIG_NET is not set. Move it from net/netlink to lib and control its inclusion based on the new config symbol CONFIG_NLATTR, which is selected by CONFIG_NET. Signed-off-by: Geert Uytterhoeven Acked-by: David S. Miller Signed-off-by: Herbert Xu --- net/Kconfig | 1 + net/netlink/Makefile | 2 +- net/netlink/attr.c | 473 --------------------------------------------------- 3 files changed, 2 insertions(+), 474 deletions(-) delete mode 100644 net/netlink/attr.c (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index cdb8fdef6c4a..eab40a481356 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -4,6 +4,7 @@ menuconfig NET bool "Networking support" + select NLATTR ---help--- Unless you really know what you are doing, you should say Y here. The reason is that some programs need kernel networking support even diff --git a/net/netlink/Makefile b/net/netlink/Makefile index e3589c2de49e..bdd6ddf4e95b 100644 --- a/net/netlink/Makefile +++ b/net/netlink/Makefile @@ -2,4 +2,4 @@ # Makefile for the netlink driver. # -obj-y := af_netlink.o attr.o genetlink.o +obj-y := af_netlink.o genetlink.o diff --git a/net/netlink/attr.c b/net/netlink/attr.c deleted file mode 100644 index 56c3ce7fe29a..000000000000 --- a/net/netlink/attr.c +++ /dev/null @@ -1,473 +0,0 @@ -/* - * NETLINK Netlink attributes - * - * Authors: Thomas Graf - * Alexey Kuznetsov - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { - [NLA_U8] = sizeof(u8), - [NLA_U16] = sizeof(u16), - [NLA_U32] = sizeof(u32), - [NLA_U64] = sizeof(u64), - [NLA_NESTED] = NLA_HDRLEN, -}; - -static int validate_nla(struct nlattr *nla, int maxtype, - const struct nla_policy *policy) -{ - const struct nla_policy *pt; - int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla); - - if (type <= 0 || type > maxtype) - return 0; - - pt = &policy[type]; - - BUG_ON(pt->type > NLA_TYPE_MAX); - - switch (pt->type) { - case NLA_FLAG: - if (attrlen > 0) - return -ERANGE; - break; - - case NLA_NUL_STRING: - if (pt->len) - minlen = min_t(int, attrlen, pt->len + 1); - else - minlen = attrlen; - - if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) - return -EINVAL; - /* fall through */ - - case NLA_STRING: - if (attrlen < 1) - return -ERANGE; - - if (pt->len) { - char *buf = nla_data(nla); - - if (buf[attrlen - 1] == '\0') - attrlen--; - - if (attrlen > pt->len) - return -ERANGE; - } - break; - - case NLA_BINARY: - if (pt->len && attrlen > pt->len) - return -ERANGE; - break; - - case NLA_NESTED_COMPAT: - if (attrlen < pt->len) - return -ERANGE; - if (attrlen < NLA_ALIGN(pt->len)) - break; - if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN) - return -ERANGE; - nla = nla_data(nla) + NLA_ALIGN(pt->len); - if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla)) - return -ERANGE; - break; - case NLA_NESTED: - /* a nested attributes is allowed to be empty; if its not, - * it must have a size of at least NLA_HDRLEN. - */ - if (attrlen == 0) - break; - default: - if (pt->len) - minlen = pt->len; - else if (pt->type != NLA_UNSPEC) - minlen = nla_attr_minlen[pt->type]; - - if (attrlen < minlen) - return -ERANGE; - } - - return 0; -} - -/** - * nla_validate - Validate a stream of attributes - * @head: head of attribute stream - * @len: length of attribute stream - * @maxtype: maximum attribute type to be expected - * @policy: validation policy - * - * Validates all attributes in the specified attribute stream against the - * specified policy. Attributes with a type exceeding maxtype will be - * ignored. See documenation of struct nla_policy for more details. - * - * Returns 0 on success or a negative error code. - */ -int nla_validate(struct nlattr *head, int len, int maxtype, - const struct nla_policy *policy) -{ - struct nlattr *nla; - int rem, err; - - nla_for_each_attr(nla, head, len, rem) { - err = validate_nla(nla, maxtype, policy); - if (err < 0) - goto errout; - } - - err = 0; -errout: - return err; -} - -/** - * nla_parse - Parse a stream of attributes into a tb buffer - * @tb: destination array with maxtype+1 elements - * @maxtype: maximum attribute type to be expected - * @head: head of attribute stream - * @len: length of attribute stream - * @policy: validation policy - * - * Parses a stream of attributes and stores a pointer to each attribute in - * the tb array accessable via the attribute type. Attributes with a type - * exceeding maxtype will be silently ignored for backwards compatibility - * reasons. policy may be set to NULL if no validation is required. - * - * Returns 0 on success or a negative error code. - */ -int nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head, int len, - const struct nla_policy *policy) -{ - struct nlattr *nla; - int rem, err; - - memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); - - nla_for_each_attr(nla, head, len, rem) { - u16 type = nla_type(nla); - - if (type > 0 && type <= maxtype) { - if (policy) { - err = validate_nla(nla, maxtype, policy); - if (err < 0) - goto errout; - } - - tb[type] = nla; - } - } - - if (unlikely(rem > 0)) - printk(KERN_WARNING "netlink: %d bytes leftover after parsing " - "attributes.\n", rem); - - err = 0; -errout: - return err; -} - -/** - * nla_find - Find a specific attribute in a stream of attributes - * @head: head of attribute stream - * @len: length of attribute stream - * @attrtype: type of attribute to look for - * - * Returns the first attribute in the stream matching the specified type. - */ -struct nlattr *nla_find(struct nlattr *head, int len, int attrtype) -{ - struct nlattr *nla; - int rem; - - nla_for_each_attr(nla, head, len, rem) - if (nla_type(nla) == attrtype) - return nla; - - return NULL; -} - -/** - * nla_strlcpy - Copy string attribute payload into a sized buffer - * @dst: where to copy the string to - * @nla: attribute to copy the string from - * @dstsize: size of destination buffer - * - * Copies at most dstsize - 1 bytes into the destination buffer. - * The result is always a valid NUL-terminated string. Unlike - * strlcpy the destination buffer is always padded out. - * - * Returns the length of the source buffer. - */ -size_t nla_strlcpy(char *dst, const struct nlattr *nla, size_t dstsize) -{ - size_t srclen = nla_len(nla); - char *src = nla_data(nla); - - if (srclen > 0 && src[srclen - 1] == '\0') - srclen--; - - if (dstsize > 0) { - size_t len = (srclen >= dstsize) ? dstsize - 1 : srclen; - - memset(dst, 0, dstsize); - memcpy(dst, src, len); - } - - return srclen; -} - -/** - * nla_memcpy - Copy a netlink attribute into another memory area - * @dest: where to copy to memcpy - * @src: netlink attribute to copy from - * @count: size of the destination area - * - * Note: The number of bytes copied is limited by the length of - * attribute's payload. memcpy - * - * Returns the number of bytes copied. - */ -int nla_memcpy(void *dest, const struct nlattr *src, int count) -{ - int minlen = min_t(int, count, nla_len(src)); - - memcpy(dest, nla_data(src), minlen); - - return minlen; -} - -/** - * nla_memcmp - Compare an attribute with sized memory area - * @nla: netlink attribute - * @data: memory area - * @size: size of memory area - */ -int nla_memcmp(const struct nlattr *nla, const void *data, - size_t size) -{ - int d = nla_len(nla) - size; - - if (d == 0) - d = memcmp(nla_data(nla), data, size); - - return d; -} - -/** - * nla_strcmp - Compare a string attribute against a string - * @nla: netlink string attribute - * @str: another string - */ -int nla_strcmp(const struct nlattr *nla, const char *str) -{ - int len = strlen(str) + 1; - int d = nla_len(nla) - len; - - if (d == 0) - d = memcmp(nla_data(nla), str, len); - - return d; -} - -/** - * __nla_reserve - reserve room for attribute on the skb - * @skb: socket buffer to reserve room on - * @attrtype: attribute type - * @attrlen: length of attribute payload - * - * Adds a netlink attribute header to a socket buffer and reserves - * room for the payload but does not copy it. - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute header and payload. - */ -struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) -{ - struct nlattr *nla; - - nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); - nla->nla_type = attrtype; - nla->nla_len = nla_attr_size(attrlen); - - memset((unsigned char *) nla + nla->nla_len, 0, nla_padlen(attrlen)); - - return nla; -} - -/** - * __nla_reserve_nohdr - reserve room for attribute without header - * @skb: socket buffer to reserve room on - * @attrlen: length of attribute payload - * - * Reserves room for attribute payload without a header. - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the payload. - */ -void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) -{ - void *start; - - start = skb_put(skb, NLA_ALIGN(attrlen)); - memset(start, 0, NLA_ALIGN(attrlen)); - - return start; -} - -/** - * nla_reserve - reserve room for attribute on the skb - * @skb: socket buffer to reserve room on - * @attrtype: attribute type - * @attrlen: length of attribute payload - * - * Adds a netlink attribute header to a socket buffer and reserves - * room for the payload but does not copy it. - * - * Returns NULL if the tailroom of the skb is insufficient to store - * the attribute header and payload. - */ -struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) -{ - if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return NULL; - - return __nla_reserve(skb, attrtype, attrlen); -} - -/** - * nla_reserve_nohdr - reserve room for attribute without header - * @skb: socket buffer to reserve room on - * @attrlen: length of attribute payload - * - * Reserves room for attribute payload without a header. - * - * Returns NULL if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return NULL; - - return __nla_reserve_nohdr(skb, attrlen); -} - -/** - * __nla_put - Add a netlink attribute to a socket buffer - * @skb: socket buffer to add attribute to - * @attrtype: attribute type - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute header and payload. - */ -void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, - const void *data) -{ - struct nlattr *nla; - - nla = __nla_reserve(skb, attrtype, attrlen); - memcpy(nla_data(nla), data, attrlen); -} - -/** - * __nla_put_nohdr - Add a netlink attribute without header - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute payload. - */ -void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) -{ - void *start; - - start = __nla_reserve_nohdr(skb, attrlen); - memcpy(start, data, attrlen); -} - -/** - * nla_put - Add a netlink attribute to a socket buffer - * @skb: socket buffer to add attribute to - * @attrtype: attribute type - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute header and payload. - */ -int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return -EMSGSIZE; - - __nla_put(skb, attrtype, attrlen, data); - return 0; -} - -/** - * nla_put_nohdr - Add a netlink attribute without header - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -EMSGSIZE; - - __nla_put_nohdr(skb, attrlen, data); - return 0; -} - -/** - * nla_append - Add a netlink attribute without header or padding - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -int nla_append(struct sk_buff *skb, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -EMSGSIZE; - - memcpy(skb_put(skb, attrlen), data, attrlen); - return 0; -} - -EXPORT_SYMBOL(nla_validate); -EXPORT_SYMBOL(nla_parse); -EXPORT_SYMBOL(nla_find); -EXPORT_SYMBOL(nla_strlcpy); -EXPORT_SYMBOL(__nla_reserve); -EXPORT_SYMBOL(__nla_reserve_nohdr); -EXPORT_SYMBOL(nla_reserve); -EXPORT_SYMBOL(nla_reserve_nohdr); -EXPORT_SYMBOL(__nla_put); -EXPORT_SYMBOL(__nla_put_nohdr); -EXPORT_SYMBOL(nla_put); -EXPORT_SYMBOL(nla_put_nohdr); -EXPORT_SYMBOL(nla_memcpy); -EXPORT_SYMBOL(nla_memcmp); -EXPORT_SYMBOL(nla_strcmp); -EXPORT_SYMBOL(nla_append); -- cgit v1.2.3 From 4843b93c96ae5043c6279c4ec6fcd8ee3866ff5b Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 3 Mar 2009 23:37:30 -0800 Subject: netlink: invert error code in netlink_set_err() The callers of netlink_set_err() currently pass a negative value as parameter for the error code. However, sk->sk_err wants a positive error value. Without this patch, skb_recv_datagram() called by netlink_recvmsg() may return a positive value to report an error. Another choice to fix this is to change callers to pass a positive error value, but this seems a bit inconsistent and error prone to me. Indeed, the callers of netlink_set_err() assumed that the (usual) negative value for error codes was fine before this patch :). This patch also includes some documentation in docbook format for netlink_set_err() to avoid this sort of confusion. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9eb895c7a2a9..3ae3cb816563 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1084,6 +1084,13 @@ out: return 0; } +/** + * netlink_set_err - report error to broadcast listeners + * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() + * @pid: the PID of a process that we want to skip (if any) + * @groups: the broadcast group that will notice the error + * @code: error code, must be negative (as usual in kernelspace) + */ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) { struct netlink_set_err_data info; @@ -1093,7 +1100,8 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) info.exclude_sk = ssk; info.pid = pid; info.group = group; - info.code = code; + /* sk->sk_err wants a positive error value */ + info.code = -code; read_lock(&nl_table_lock); -- cgit v1.2.3 From 4222474519ff5b31a526dfa1da7aa4b0e38bef5c Mon Sep 17 00:00:00 2001 From: Meelis Roos Date: Tue, 3 Mar 2009 23:48:50 -0800 Subject: net: fix tokenring license Currently, modular tokenring ("tr") lacks a license and fails to load: tr: module license 'unspecified' taints kernel. tr: Unknown symbol proc_net_fops_create Beacuse of this, no tokenring driver can load if it depends on modular tr. Fix this by adding GPL module license as it is in the kernel. With this fix, tr module loads fine and tms380 driver also loads. Well, it does'nt work but that's a different bug. Signed-off-by: Meelis Roos Signed-off-by: David S. Miller --- net/802/tr.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/802/tr.c b/net/802/tr.c index 158150fee462..f47ae289d83b 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -668,3 +668,5 @@ module_init(rif_init); EXPORT_SYMBOL(tr_type_trans); EXPORT_SYMBOL(alloc_trdev); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 0c5c2d3089068d4aa378f7a40d2b5ad9d4f52ce8 Mon Sep 17 00:00:00 2001 From: Eric Biederman Date: Wed, 4 Mar 2009 00:03:08 -0800 Subject: neigh: Allow for user space users of the neighbour table Currently it is possible to do just about everything with the arp table from user space except treat an entry like you are using it. To that end implement and a flag NTF_USE that when set in a netwlink update request treats the neighbour table entry like the kernel does on the output path. This allows user space applications to share the kernel's arp cache. Signed-off-by: Eric Biederman Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 417b6d739fb7..a1cbce7fdae5 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1654,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); + if (ndm->ndm_flags & NTF_USE) { + neigh_event_send(neigh, NULL); + err = 0; + } else + err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); neigh_release(neigh); goto out_dev_put; } -- cgit v1.2.3 From fe7ca2e1e847b65c12d245cbf402af89da96888a Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Wed, 4 Mar 2009 03:18:11 -0800 Subject: IPv6: add "disable" module parameter support to ipv6.ko Add "disable" module parameter support to ipv6.ko by specifying "disable=1" on module load. We just do the minimum of initializing inetsw6[] so calls from other modules to inet6_register_protosw() won't OOPs, then bail out. No IPv6 addresses or sockets can be created as a result, and a reboot is required to enable IPv6. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index c802bc1658a8..da944eca2ca6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -72,6 +72,10 @@ MODULE_LICENSE("GPL"); static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); +static int disable_ipv6 = 0; +module_param_named(disable, disable_ipv6, int, 0); +MODULE_PARM_DESC(disable, "Disable IPv6 such that it is non-functional"); + static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->obj_size - sizeof(struct ipv6_pinfo); @@ -991,10 +995,21 @@ static int __init inet6_init(void) { struct sk_buff *dummy_skb; struct list_head *r; - int err; + int err = 0; BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); + /* Register the socket-side information for inet6_create. */ + for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) + INIT_LIST_HEAD(r); + + if (disable_ipv6) { + printk(KERN_INFO + "IPv6: Loaded, but administratively disabled, " + "reboot required to enable\n"); + goto out; + } + err = proto_register(&tcpv6_prot, 1); if (err) goto out; @@ -1012,10 +1027,6 @@ static int __init inet6_init(void) goto out_unregister_udplite_proto; - /* Register the socket-side information for inet6_create. */ - for(r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) - INIT_LIST_HEAD(r); - /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ -- cgit v1.2.3 From fb13d9f9e450bceafd88ac8a98f7a98e8096a5fe Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Wed, 4 Mar 2009 03:20:26 -0800 Subject: SCTP: change sctp_ctl_sock_init() to try IPv4 if IPv6 fails Change sctp_ctl_sock_init() to try IPv4 if IPv6 socket registration fails. Required if the IPv6 module is loaded with "disable=1", else SCTP will fail to load. Signed-off-by: Brian Haley Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 4e6638449639..c4986d0f7419 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -717,15 +717,20 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, static int sctp_ctl_sock_init(void) { int err; - sa_family_t family; + sa_family_t family = PF_INET; if (sctp_get_pf_specific(PF_INET6)) family = PF_INET6; - else - family = PF_INET; err = inet_ctl_sock_create(&sctp_ctl_sock, family, SOCK_SEQPACKET, IPPROTO_SCTP, &init_net); + + /* If IPv6 socket could not be created, try the IPv4 socket */ + if (err < 0 && family == PF_INET6) + err = inet_ctl_sock_create(&sctp_ctl_sock, AF_INET, + SOCK_SEQPACKET, IPPROTO_SCTP, + &init_net); + if (err < 0) { printk(KERN_ERR "SCTP: Failed to create the SCTP control socket.\n"); -- cgit v1.2.3 From a883bf564ea555447a76682bb2d8d4bc92e23e0e Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 4 Mar 2009 17:38:10 -0800 Subject: pkt_sched: act_police: Fix a rate estimator test. A commit c1b56878fb68e9c14070939ea4537ad4db79ffae "tc: policing requires a rate estimator" introduced a test which invalidates previously working configs, based on examples from iproute2: doc/actions/actions-general. This is too rigorous: a rate estimator is needed only when police's "avrate" option is used. Reported-by: Joao Correia Diagnosed-by: John Dykstra Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/act_police.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 5c72a116b1a4..f8f047b61245 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -183,13 +183,6 @@ override: if (R_tab == NULL) goto failure; - if (!est && (ret == ACT_P_CREATED || - !gen_estimator_active(&police->tcf_bstats, - &police->tcf_rate_est))) { - err = -EINVAL; - goto failure; - } - if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE]); @@ -205,6 +198,12 @@ override: &police->tcf_lock, est); if (err) goto failure_unlock; + } else if (tb[TCA_POLICE_AVRATE] && + (ret == ACT_P_CREATED || + !gen_estimator_active(&police->tcf_bstats, + &police->tcf_rate_est))) { + err = -EINVAL; + goto failure_unlock; } /* No failure allowed after this point */ -- cgit v1.2.3 From 54acd0efab072cb70e87206329d561b297f93bbb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 4 Mar 2009 23:01:02 -0800 Subject: net: Fix missing dev->neigh_setup in register_netdevice(). Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 9e4afe650e7a..2dd484ed3dbb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4339,6 +4339,7 @@ int register_netdevice(struct net_device *dev) dev->do_ioctl = ops->ndo_do_ioctl; dev->set_config = ops->ndo_set_config; dev->change_mtu = ops->ndo_change_mtu; + dev->neigh_setup = ops->ndo_neigh_setup; dev->tx_timeout = ops->ndo_tx_timeout; dev->get_stats = ops->ndo_get_stats; dev->vlan_rx_register = ops->ndo_vlan_rx_register; -- cgit v1.2.3 From 9d40bbda599def1e1d155d7f7dca14fe8744bd2b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 4 Mar 2009 23:46:25 -0800 Subject: vlan: Fix vlan-in-vlan crashes. As analyzed by Patrick McHardy, vlan needs to reset it's netdev_ops pointer in it's ->init() function but this leaves the compat method pointers stale. Add a netdev_resync_ops() and call it from the vlan code. Any other driver which changes ->netdev_ops after register_netdevice() will need to call this new function after doing so too. With help from Patrick McHardy. Tested-by: Patrick McHardy Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 3 ++- net/core/dev.c | 56 +++++++++++++++++++++++++++++++--------------------- 2 files changed, 36 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4a19acd3a32b..1b34135cf990 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -553,7 +553,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa) int err = 0; if (netif_device_present(real_dev) && ops->ndo_neigh_setup) - err = ops->ndo_neigh_setup(dev, pa); + err = ops->ndo_neigh_setup(real_dev, pa); return err; } @@ -639,6 +639,7 @@ static int vlan_dev_init(struct net_device *dev) dev->hard_header_len = real_dev->hard_header_len + VLAN_HLEN; dev->netdev_ops = &vlan_netdev_ops; } + netdev_resync_ops(dev); if (is_vlan_dev(real_dev)) subclass = 1; diff --git a/net/core/dev.c b/net/core/dev.c index 2dd484ed3dbb..f1129706ce7b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4282,6 +4282,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name) } EXPORT_SYMBOL(netdev_fix_features); +/* Some devices need to (re-)set their netdev_ops inside + * ->init() or similar. If that happens, we have to setup + * the compat pointers again. + */ +void netdev_resync_ops(struct net_device *dev) +{ +#ifdef CONFIG_COMPAT_NET_DEV_OPS + const struct net_device_ops *ops = dev->netdev_ops; + + dev->init = ops->ndo_init; + dev->uninit = ops->ndo_uninit; + dev->open = ops->ndo_open; + dev->change_rx_flags = ops->ndo_change_rx_flags; + dev->set_rx_mode = ops->ndo_set_rx_mode; + dev->set_multicast_list = ops->ndo_set_multicast_list; + dev->set_mac_address = ops->ndo_set_mac_address; + dev->validate_addr = ops->ndo_validate_addr; + dev->do_ioctl = ops->ndo_do_ioctl; + dev->set_config = ops->ndo_set_config; + dev->change_mtu = ops->ndo_change_mtu; + dev->neigh_setup = ops->ndo_neigh_setup; + dev->tx_timeout = ops->ndo_tx_timeout; + dev->get_stats = ops->ndo_get_stats; + dev->vlan_rx_register = ops->ndo_vlan_rx_register; + dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + dev->poll_controller = ops->ndo_poll_controller; +#endif +#endif +} +EXPORT_SYMBOL(netdev_resync_ops); + /** * register_netdevice - register a network device * @dev: device to register @@ -4326,28 +4359,7 @@ int register_netdevice(struct net_device *dev) * This is temporary until all network devices are converted. */ if (dev->netdev_ops) { - const struct net_device_ops *ops = dev->netdev_ops; - - dev->init = ops->ndo_init; - dev->uninit = ops->ndo_uninit; - dev->open = ops->ndo_open; - dev->change_rx_flags = ops->ndo_change_rx_flags; - dev->set_rx_mode = ops->ndo_set_rx_mode; - dev->set_multicast_list = ops->ndo_set_multicast_list; - dev->set_mac_address = ops->ndo_set_mac_address; - dev->validate_addr = ops->ndo_validate_addr; - dev->do_ioctl = ops->ndo_do_ioctl; - dev->set_config = ops->ndo_set_config; - dev->change_mtu = ops->ndo_change_mtu; - dev->neigh_setup = ops->ndo_neigh_setup; - dev->tx_timeout = ops->ndo_tx_timeout; - dev->get_stats = ops->ndo_get_stats; - dev->vlan_rx_register = ops->ndo_vlan_rx_register; - dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid; - dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid; -#ifdef CONFIG_NET_POLL_CONTROLLER - dev->poll_controller = ops->ndo_poll_controller; -#endif + netdev_resync_ops(dev); } else { char drivername[64]; pr_info("%s (%s): not using net_device_ops yet\n", -- cgit v1.2.3 From e31ae0508315ebf5d8b1b8a1fca8550737fb3996 Mon Sep 17 00:00:00 2001 From: Sujith Date: Fri, 27 Feb 2009 09:44:00 +0530 Subject: mac80211: Notify the driver only when the beacon interval changes Currently, the driver is unconditionally notified of beacon interval. This is a problem in AP mode, because the driver has to know that the beacon interval has actualy changed to recalculate TBTT and reset the HW TSF. Fix this to make mac80211 notify the driver only when the beacon interval has been reconfigured to a new value. Signed-off-by: Sujith Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index c43129efc3bf..58693e52d458 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -451,7 +451,8 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, * This is a kludge. beacon interval should really be part * of the beacon information. */ - if (params->interval) { + if (params->interval && (sdata->local->hw.conf.beacon_int != + params->interval)) { sdata->local->hw.conf.beacon_int = params->interval; err = ieee80211_hw_config(sdata->local, IEEE80211_CONF_CHANGE_BEACON_INTERVAL); -- cgit v1.2.3 From 24776cfd5559d3171054d3a7ea76d5febc54b03d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Feb 2009 16:33:55 -0600 Subject: mac80211: Fix quality reporting for wireless stats Since "mac80211/cfg80211: move iwrange handler to cfg80211", the results for link quality from "iwlist scan" and "iwconfig" commands have been very different. The results are now consistent. Signed-off-by: Johannes Berg Reported- and tested-by: Larry Finger Signed-off-by: John W. Linville --- net/mac80211/wext.c | 58 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 39 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index f6924fc065d3..935c63ed3dfa 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -886,21 +886,6 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, return ret; } -static u8 ieee80211_get_wstats_flags(struct ieee80211_local *local) -{ - u8 wstats_flags = 0; - - wstats_flags |= local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | - IEEE80211_HW_SIGNAL_DBM) ? - IW_QUAL_QUAL_UPDATED : IW_QUAL_QUAL_INVALID; - wstats_flags |= local->hw.flags & IEEE80211_HW_NOISE_DBM ? - IW_QUAL_NOISE_UPDATED : IW_QUAL_NOISE_INVALID; - if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) - wstats_flags |= IW_QUAL_DBM; - - return wstats_flags; -} - /* Get wireless statistics. Called by /proc/net/wireless and by SIOCGIWSTATS */ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev) { @@ -922,10 +907,45 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev wstats->qual.noise = 0; wstats->qual.updated = IW_QUAL_ALL_INVALID; } else { - wstats->qual.level = sta->last_signal; - wstats->qual.qual = sta->last_qual; - wstats->qual.noise = sta->last_noise; - wstats->qual.updated = ieee80211_get_wstats_flags(local); + wstats->qual.updated = 0; + /* + * mirror what cfg80211 does for iwrange/scan results, + * otherwise userspace gets confused. + */ + if (local->hw.flags & (IEEE80211_HW_SIGNAL_UNSPEC | + IEEE80211_HW_SIGNAL_DBM)) { + wstats->qual.updated |= IW_QUAL_LEVEL_UPDATED; + wstats->qual.updated |= IW_QUAL_QUAL_UPDATED; + } else { + wstats->qual.updated |= IW_QUAL_LEVEL_INVALID; + wstats->qual.updated |= IW_QUAL_QUAL_INVALID; + } + + if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC) { + wstats->qual.level = sta->last_signal; + wstats->qual.qual = sta->last_signal; + } else if (local->hw.flags & IEEE80211_HW_SIGNAL_DBM) { + int sig = sta->last_signal; + + wstats->qual.updated |= IW_QUAL_DBM; + wstats->qual.level = sig; + if (sig < -110) + sig = -110; + else if (sig > -40) + sig = -40; + wstats->qual.qual = sig + 110; + } + + if (local->hw.flags & IEEE80211_HW_NOISE_DBM) { + /* + * This assumes that if driver reports noise, it also + * reports signal in dBm. + */ + wstats->qual.noise = sta->last_noise; + wstats->qual.updated |= IW_QUAL_NOISE_UPDATED; + } else { + wstats->qual.updated |= IW_QUAL_NOISE_INVALID; + } } rcu_read_unlock(); -- cgit v1.2.3 From e65c22633c14eabe9593a71a727f81544378b892 Mon Sep 17 00:00:00 2001 From: Sujith Date: Mon, 2 Mar 2009 13:28:31 +0530 Subject: mac80211: Fix TKIP/WEP HT capability handling There is no need to parse the AP's HT capabilities if the STA uses TKIP/WEP cipher. This allows the rate control module to choose the correct(legacy) rate table. Signed-off-by: Sujith Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 7f238589b6ff..52d876e3eabe 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1307,7 +1307,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, else sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - if (elems.ht_cap_elem) + /* If TKIP/WEP is used, no need to parse AP's HT capabilities */ + if (elems.ht_cap_elem && !(ifmgd->flags & IEEE80211_STA_TKIP_WEP_USED)) ieee80211_ht_cap_ie_to_sta_ht_cap(sband, elems.ht_cap_elem, &sta->sta.ht_cap); -- cgit v1.2.3 From 25c9c8752849212a25bf7f38b40b64b3958d619b Mon Sep 17 00:00:00 2001 From: Vivek Natarajan Date: Mon, 2 Mar 2009 20:20:30 +0530 Subject: mac80211: Always send a null data frame if TIM bit is set. If the AP thinks we are in power save state eventhough we are not truly in that state, it sets the TIM bit and does not send a data frame unless we send a null data frame to correct the state in the AP. This might happen if the null data frame for wake up is lost in the air after we disable power save. Signed-off-by: Vivek Natarajan Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 52d876e3eabe..391445c6b892 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1458,8 +1458,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, ieee80211_sta_wmm_params(local, ifmgd, elems.wmm_param, elems.wmm_param_len); - if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK && - local->hw.conf.flags & IEEE80211_CONF_PS) { + if (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) { directed_tim = ieee80211_check_tim(&elems, ifmgd->aid); if (directed_tim) { -- cgit v1.2.3 From 707c1b4e68a2811ff2c9e75750a98a3310789a2d Mon Sep 17 00:00:00 2001 From: Sujith Date: Tue, 3 Mar 2009 10:15:10 +0530 Subject: mac80211: Update IBSS beacon timestamp properly In IBSS mode, the beacon timestamp has to be filled with the BSS's timestamp when joining, and set to zero when creating a new BSS. Signed-off-by: Sujith Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index a96ce9dfc6b5..f4becc12904e 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -64,7 +64,7 @@ static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, const int freq, const size_t supp_rates_len, const u8 *supp_rates, - const u16 capability) + const u16 capability, u64 tsf) { struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; @@ -127,6 +127,7 @@ static int __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(local->hw.conf.beacon_int); + mgmt->u.beacon.timestamp = cpu_to_le64(tsf); mgmt->u.beacon.capab_info = cpu_to_le16(capability); pos = skb_put(skb, 2 + ifibss->ssid_len); @@ -199,7 +200,8 @@ static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, bss->cbss.beacon_interval, bss->cbss.channel->center_freq, bss->supp_rates_len, bss->supp_rates, - bss->cbss.capability); + bss->cbss.capability, + bss->cbss.tsf); } static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, @@ -502,7 +504,7 @@ static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata) bssid, local->hw.conf.beacon_int, local->hw.conf.channel->center_freq, sband->n_bitrates, supp_rates, - capability); + capability, 0); } static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata) -- cgit v1.2.3 From bd05f28e1a15ae62994fe309a524695fe26dd834 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Tue, 3 Mar 2009 22:55:21 +0100 Subject: cfg80211: test before subtraction on unsigned freq_diff is unsigned, so test before subtraction Signed-off-by: Roel Kluin Signed-off-by: John W. Linville --- net/wireless/reg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 85c9034c59b2..bd0a16c3de5e 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -380,7 +380,8 @@ static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; - if (freq_diff <= 0 || freq_range->max_bandwidth_khz > freq_diff) + if (freq_range->end_freq_khz <= freq_range->start_freq_khz || + freq_range->max_bandwidth_khz > freq_diff) return false; return true; -- cgit v1.2.3 From 7546dd97d27306d939c13e03318aae695badaa88 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 9 Mar 2009 08:18:29 +0000 Subject: net: convert usage of packet_type to read_mostly Protocols that use packet_type can be __read_mostly section for better locality. Elminate any unnecessary initializations of NULL. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/8021q/vlan.c | 2 +- net/appletalk/ddp.c | 4 ++-- net/ax25/af_ax25.c | 3 +-- net/decnet/af_decnet.c | 3 +-- net/dsa/tag_dsa.c | 2 +- net/dsa/tag_edsa.c | 2 +- net/dsa/tag_trailer.c | 2 +- net/econet/af_econet.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/arp.c | 2 +- net/ipv6/af_inet6.c | 2 +- net/ipx/af_ipx.c | 4 ++-- net/irda/irmod.c | 2 +- net/llc/llc_core.c | 4 ++-- net/phonet/af_phonet.c | 3 +-- net/x25/af_x25.c | 2 +- 16 files changed, 19 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 4163ea65bf41..2b7390e377b3 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -51,7 +51,7 @@ const char vlan_version[] = DRV_VERSION; static const char vlan_copyright[] = "Ben Greear "; static const char vlan_buggyright[] = "David S. Miller "; -static struct packet_type vlan_packet_type = { +static struct packet_type vlan_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_8021Q), .func = vlan_skb_recv, /* VLAN receive method */ }; diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index cf05c43cba52..3e0671df3a3f 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1860,12 +1860,12 @@ static struct notifier_block ddp_notifier = { .notifier_call = ddp_device_event, }; -static struct packet_type ltalk_packet_type = { +static struct packet_type ltalk_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_LOCALTALK), .func = ltalk_rcv, }; -static struct packet_type ppptalk_packet_type = { +static struct packet_type ppptalk_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_PPPTALK), .func = atalk_rcv, }; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index d127fd3ba5c6..8f8f63ff6566 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1985,9 +1985,8 @@ static const struct proto_ops ax25_proto_ops = { /* * Called by socket.c on kernel start up */ -static struct packet_type ax25_packet_type = { +static struct packet_type ax25_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_AX25), - .dev = NULL, /* All devices */ .func = ax25_kiss_rcv, }; diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index ec233b64f853..9647d911f916 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2112,9 +2112,8 @@ static struct notifier_block dn_dev_notifier = { extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *); -static struct packet_type dn_dix_packet_type = { +static struct packet_type dn_dix_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_DNA_RT), - .dev = NULL, /* All devices */ .func = dn_route_rcv, }; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 63e532a69fdb..0b8a91ddff44 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -175,7 +175,7 @@ out: return 0; } -static struct packet_type dsa_packet_type = { +static struct packet_type dsa_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_DSA), .func = dsa_rcv, }; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 6197f9a7ef42..16fcb6d196d4 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -194,7 +194,7 @@ out: return 0; } -static struct packet_type edsa_packet_type = { +static struct packet_type edsa_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_EDSA), .func = edsa_rcv, }; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index d7e7f424ff0c..a6d959da6784 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -111,7 +111,7 @@ out: return 0; } -static struct packet_type trailer_packet_type = { +static struct packet_type trailer_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_TRAILER), .func = trailer_rcv, }; diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index 7bf35582f656..6f479fa522c3 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -1102,7 +1102,7 @@ drop: return NET_RX_DROP; } -static struct packet_type econet_packet_type = { +static struct packet_type econet_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_ECONET), .func = econet_rcv, }; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 627be4dc7fb0..d5aaabbb7cb3 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1500,7 +1500,7 @@ static int ipv4_proc_init(void); * IP protocol layer initialiser */ -static struct packet_type ip_packet_type = { +static struct packet_type ip_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IP), .func = ip_rcv, .gso_send_check = inet_gso_send_check, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 3f6b7354699b..3d67d1ffed77 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1225,7 +1225,7 @@ void arp_ifdown(struct net_device *dev) * Called once on startup. */ -static struct packet_type arp_packet_type = { +static struct packet_type arp_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_ARP), .func = arp_rcv, }; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 57b07da1212a..3e2ddfaee81a 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -893,7 +893,7 @@ out_unlock: return err; } -static struct packet_type ipv6_packet_type = { +static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, .gso_send_check = ipv6_gso_send_check, diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 43d0ffc6d565..30bd322b7985 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1958,12 +1958,12 @@ static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = { SOCKOPS_WRAP(ipx_dgram, PF_IPX); -static struct packet_type ipx_8023_packet_type = { +static struct packet_type ipx_8023_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_3), .func = ipx_rcv, }; -static struct packet_type ipx_dix_packet_type = { +static struct packet_type ipx_dix_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPX), .func = ipx_rcv, }; diff --git a/net/irda/irmod.c b/net/irda/irmod.c index 1bb607f2f5c7..303a68d92731 100644 --- a/net/irda/irmod.c +++ b/net/irda/irmod.c @@ -55,7 +55,7 @@ EXPORT_SYMBOL(irda_debug); /* Packet type handler. * Tell the kernel how IrDA packets should be handled. */ -static struct packet_type irda_packet_type = { +static struct packet_type irda_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IRDA), .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */ }; diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c index a7fe1adc378d..ff4c0ab96a69 100644 --- a/net/llc/llc_core.c +++ b/net/llc/llc_core.c @@ -147,12 +147,12 @@ void llc_sap_close(struct llc_sap *sap) kfree(sap); } -static struct packet_type llc_packet_type = { +static struct packet_type llc_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_802_2), .func = llc_rcv, }; -static struct packet_type llc_tr_packet_type = { +static struct packet_type llc_tr_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_TR_802_2), .func = llc_rcv, }; diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index 81795ea87794..a662e62a99cf 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -382,9 +382,8 @@ out: return NET_RX_DROP; } -static struct packet_type phonet_packet_type = { +static struct packet_type phonet_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_PHONET), - .dev = NULL, .func = phonet_rcv, }; diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 8f76f4009c24..1000e9a26fdb 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1608,7 +1608,7 @@ static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = { SOCKOPS_WRAP(x25_proto, AF_X25); -static struct packet_type x25_packet_type = { +static struct packet_type x25_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_X25), .func = x25_lapb_receive_frame, }; -- cgit v1.2.3 From a2205472c3017bfe97b6cb6f5acd6ca141a97eda Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 9 Mar 2009 13:51:55 +0000 Subject: net: fix warning about non-const string Since dev_set_name takes a printf style string, new gcc complains if arg is not const. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 484f58750eba..2da59a0ac4ac 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -498,7 +498,7 @@ int netdev_register_kobject(struct net_device *net) dev->groups = groups; BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ); - dev_set_name(dev, net->name); + dev_set_name(dev, "%s", net->name); #ifdef CONFIG_SYSFS *groups++ = &netstat_group; -- cgit v1.2.3 From eb9b55ab4d73280597fd183b367d50452f4d7846 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Mar 2009 20:33:16 -0400 Subject: SUNRPC: Tighten up the task locking rules in __rpc_execute() We should probably not be testing any flags after we've cleared the RPC_TASK_RUNNING flag, since rpc_make_runnable() is then free to assign the rpc_task to another workqueue, which may then destroy it. We can fix any races with rpc_make_runnable() by ensuring that we only clear the RPC_TASK_RUNNING flag while holding the rpc_wait_queue->lock that the task is supposed to be sleeping on (and then checking whether or not the task really is sleeping). Signed-off-by: Trond Myklebust --- net/sunrpc/sched.c | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 385f427bedad..ff50a0546865 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -293,11 +293,6 @@ static void rpc_make_runnable(struct rpc_task *task) rpc_clear_queued(task); if (rpc_test_and_set_running(task)) return; - /* We might have raced */ - if (RPC_IS_QUEUED(task)) { - rpc_clear_running(task); - return; - } if (RPC_IS_ASYNC(task)) { int status; @@ -607,7 +602,9 @@ void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata) */ static void __rpc_execute(struct rpc_task *task) { - int status = 0; + struct rpc_wait_queue *queue; + int task_is_async = RPC_IS_ASYNC(task); + int status = 0; dprintk("RPC: %5u __rpc_execute flags=0x%x\n", task->tk_pid, task->tk_flags); @@ -647,15 +644,25 @@ static void __rpc_execute(struct rpc_task *task) */ if (!RPC_IS_QUEUED(task)) continue; - rpc_clear_running(task); - if (RPC_IS_ASYNC(task)) { - /* Careful! we may have raced... */ - if (RPC_IS_QUEUED(task)) - return; - if (rpc_test_and_set_running(task)) - return; + /* + * The queue->lock protects against races with + * rpc_make_runnable(). + * + * Note that once we clear RPC_TASK_RUNNING on an asynchronous + * rpc_task, rpc_make_runnable() can assign it to a + * different workqueue. We therefore cannot assume that the + * rpc_task pointer may still be dereferenced. + */ + queue = task->tk_waitqueue; + spin_lock_bh(&queue->lock); + if (!RPC_IS_QUEUED(task)) { + spin_unlock_bh(&queue->lock); continue; } + rpc_clear_running(task); + spin_unlock_bh(&queue->lock); + if (task_is_async) + return; /* sync task: sleep here */ dprintk("RPC: %5u sync task going to sleep\n", task->tk_pid); -- cgit v1.2.3 From ff8cf9a93800e8118ea097c1aba7203d59a0f3f1 Mon Sep 17 00:00:00 2001 From: John Dykstra Date: Wed, 11 Mar 2009 09:22:51 -0700 Subject: ipv6: Fix BUG when disabled ipv6 module is unloaded Do not try to "uninitialize" ipv6 if its initialization had been skipped because module parameter disable=1 had been specified. Reported-by: Thomas Backlund Signed-off-by: John Dykstra Acked-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index da944eca2ca6..9c8309ed35cf 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1192,6 +1192,9 @@ module_init(inet6_init); static void __exit inet6_exit(void) { + if (disable_ipv6) + return; + /* First of all disallow new sockets creation. */ sock_unregister(PF_INET6); /* Disallow any further netlink messages */ -- cgit v1.2.3 From fc1ad92dfc4e363a055053746552cdb445ba5c57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 Mar 2009 09:23:57 -0700 Subject: tcp: allow timestamps even if SYN packet has tsval=0 Some systems send SYN packets with apparently wrong RFC1323 timestamp option values [timestamp tsval=0 tsecr=0]. It might be for security reasons (http://www.secuobs.com/plugs/25220.shtml ) Linux TCP stack ignores this option and sends back a SYN+ACK packet without timestamp option, thus many TCP flows cannot use timestamps and lose some benefit of RFC1323. Other operating systems seem to not care about initial tsval value, and let tcp flows to negotiate timestamp option. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a7381205bbfc..d0a314879d81 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1226,15 +1226,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); - if (tmp_opt.saw_tstamp && !tmp_opt.rcv_tsval) { - /* Some OSes (unknown ones, but I see them on web server, which - * contains information interesting only for windows' - * users) do not send their stamp in SYN. It is easy case. - * We simply do not advertise TS support. - */ - tmp_opt.saw_tstamp = 0; - tmp_opt.tstamp_ok = 0; - } tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb); -- cgit v1.2.3 From fba91afbec2c004e2c8733ae9e0ca6998e962c64 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:06:41 -0400 Subject: SUNRPC: Fix an Oops due to socket not set up yet... We can Oops in both xs_udp_send_request() and xs_tcp_send_request() if the call to xs_sendpages() returns an error due to the socket not yet being set up. Deal with that situation by returning a new error: ENOTSOCK, so that we know to avoid dereferencing transport->sock. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5cbb404c4cdf..a71fefd61910 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -467,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, int err, sent = 0; if (unlikely(!sock)) - return -ENOTCONN; + return -ENOTSOCK; clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags); if (base != 0) { @@ -594,6 +594,10 @@ static int xs_udp_send_request(struct rpc_task *task) } switch (status) { + case -ENOTSOCK: + status = -ENOTCONN; + /* Should we call xs_close() here? */ + break; case -EAGAIN: xs_nospace(task); break; @@ -693,6 +697,10 @@ static int xs_tcp_send_request(struct rpc_task *task) } switch (status) { + case -ENOTSOCK: + status = -ENOTCONN; + /* Should we call xs_close() here? */ + break; case -EAGAIN: xs_nospace(task); break; -- cgit v1.2.3 From 01d37c428ae080563c0a3bb8bdfa88c65a6891d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:09:39 -0400 Subject: SUNRPC: xprt_connect() don't abort the task if the transport isn't bound If the transport isn't bound, then we should just return ENOTCONN, letting call_connect_status() and/or call_status() deal with retrying. Currently, we appear to abort all pending tasks with an EIO error. Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 29e401bb612e..62098d101a1f 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -663,7 +663,7 @@ void xprt_connect(struct rpc_task *task) xprt, (xprt_connected(xprt) ? "is" : "is not")); if (!xprt_bound(xprt)) { - task->tk_status = -EIO; + task->tk_status = -EAGAIN; return; } if (!xprt_lock_write(xprt, task)) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index a71fefd61910..29c71e645b27 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -577,6 +577,8 @@ static int xs_udp_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); + if (!xprt_bound(xprt)) + return -ENOTCONN; status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, @@ -1531,7 +1533,7 @@ static void xs_udp_connect_worker4(struct work_struct *work) struct socket *sock = transport->sock; int err, status = -EIO; - if (xprt->shutdown || !xprt_bound(xprt)) + if (xprt->shutdown) goto out; /* Start by resetting any existing state */ @@ -1572,7 +1574,7 @@ static void xs_udp_connect_worker6(struct work_struct *work) struct socket *sock = transport->sock; int err, status = -EIO; - if (xprt->shutdown || !xprt_bound(xprt)) + if (xprt->shutdown) goto out; /* Start by resetting any existing state */ @@ -1656,6 +1658,9 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) write_unlock_bh(&sk->sk_callback_lock); } + if (!xprt_bound(xprt)) + return -ENOTCONN; + /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; @@ -1676,7 +1681,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) struct socket *sock = transport->sock; int err, status = -EIO; - if (xprt->shutdown || !xprt_bound(xprt)) + if (xprt->shutdown) goto out; if (!sock) { @@ -1736,7 +1741,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) struct socket *sock = transport->sock; int err, status = -EIO; - if (xprt->shutdown || !xprt_bound(xprt)) + if (xprt->shutdown) goto out; if (!sock) { -- cgit v1.2.3 From fe315e76fc3a3f9f7e1581dc22fec7e7719f0896 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 11 Mar 2009 14:10:21 -0400 Subject: SUNRPC: Avoid spurious wake-up during UDP connect processing To clear out old state, the UDP connect workers unconditionally invoke xs_close() before proceeding with a new connect. Nowadays this causes a spurious wake-up of the task waiting for the connect to complete. This is a little racey, but usually harmless. The waiting task immediately retries the connect via a call_bind/call_connect sequence, which usually finds the transport already in the connected state because the connect worker has finished in the background. To avoid a spurious wake-up, factor the xs_close() logic that resets the underlying socket into a helper, and have the UDP connect workers call that helper instead of xs_close(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 44 ++++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 29c71e645b27..1127eb934136 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -767,23 +767,13 @@ static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *s sk->sk_error_report = transport->old_error_report; } -/** - * xs_close - close a socket - * @xprt: transport - * - * This is used when all requests are complete; ie, no DRC state remains - * on the server we want to save. - */ -static void xs_close(struct rpc_xprt *xprt) +static void xs_reset_transport(struct sock_xprt *transport) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct socket *sock = transport->sock; struct sock *sk = transport->inet; - if (!sk) - goto clear_close_wait; - - dprintk("RPC: xs_close xprt %p\n", xprt); + if (sk == NULL) + return; write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; @@ -797,7 +787,23 @@ static void xs_close(struct rpc_xprt *xprt) sk->sk_no_check = 0; sock_release(sock); -clear_close_wait: +} + +/** + * xs_close - close a socket + * @xprt: transport + * + * This is used when all requests are complete; ie, no DRC state remains + * on the server we want to save. + */ +static void xs_close(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + + dprintk("RPC: xs_close xprt %p\n", xprt); + + xs_reset_transport(transport); + smp_mb__before_clear_bit(); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); @@ -1537,9 +1543,10 @@ static void xs_udp_connect_worker4(struct work_struct *work) goto out; /* Start by resetting any existing state */ - xs_close(xprt); + xs_reset_transport(transport); - if ((err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + err = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } @@ -1578,9 +1585,10 @@ static void xs_udp_connect_worker6(struct work_struct *work) goto out; /* Start by resetting any existing state */ - xs_close(xprt); + xs_reset_transport(transport); - if ((err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { + err = sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock); + if (err < 0) { dprintk("RPC: can't create UDP transport socket (%d).\n", -err); goto out; } -- cgit v1.2.3 From b1e1e158779f1d99c2cc18e466f6bf9099fc0853 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Wed, 11 Mar 2009 14:37:55 -0400 Subject: SVCRDMA: remove faulty assertions in rpc/rdma chunk validation. Certain client-provided RPCRDMA chunk alignments result in an additional scatter/gather entry, which triggered nfs/rdma server assertions incorrectly. OpenSolaris nfs/rdma client connectathon testing was blocked by these in the special/locking section. Signed-off-by: Tom Talpey Cc: Tom Tucker Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index a3334e3b73cc..d0bea987d80e 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -191,7 +191,6 @@ static int map_xdr(struct svcxprt_rdma *xprt, struct xdr_buf *xdr, struct svc_rdma_req_map *vec) { - int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; int sge_no; u32 sge_bytes; u32 page_bytes; @@ -235,7 +234,11 @@ static int map_xdr(struct svcxprt_rdma *xprt, sge_no++; } - BUG_ON(sge_no > sge_max); + dprintk("svcrdma: map_xdr: sge_no %d page_no %d " + "page_base %zd page_len %zd head_len %d tail_len %d\n", + sge_no, page_no, xdr->page_base, xdr->page_len, + xdr->head[0].iov_len, xdr->tail[0].iov_len); + vec->count = sge_no; return 0; } @@ -579,7 +582,6 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->sge[page_no+1].length = 0; } BUG_ON(sge_no > rdma->sc_max_sge); - BUG_ON(sge_no > ctxt->count); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; -- cgit v1.2.3 From b38ab40ad58c1fc43ea590d6342f6a6763ac8fb6 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Wed, 11 Mar 2009 14:37:55 -0400 Subject: XPRTRDMA: correct an rpc/rdma inline send marshaling error Certain client rpc's which contain both lengthy page-contained metadata and a non-empty xdr_tail buffer require careful handling to avoid overlapped memory copying. Rearranging of existing rpcrdma marshaling code avoids it; this fixes an NFSv4 symlink creation error detected with connectathon basic/test8 to multiple servers. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/rpc_rdma.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 14106d26bb95..e5e28d1946a4 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -310,6 +310,19 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) __func__, pad, destp, rqst->rq_slen, curlen); copy_len = rqst->rq_snd_buf.page_len; + + if (rqst->rq_snd_buf.tail[0].iov_len) { + curlen = rqst->rq_snd_buf.tail[0].iov_len; + if (destp + copy_len != rqst->rq_snd_buf.tail[0].iov_base) { + memmove(destp + copy_len, + rqst->rq_snd_buf.tail[0].iov_base, curlen); + r_xprt->rx_stats.pullup_copy_count += curlen; + } + dprintk("RPC: %s: tail destp 0x%p len %d\n", + __func__, destp + copy_len, curlen); + rqst->rq_svec[0].iov_len += curlen; + } + r_xprt->rx_stats.pullup_copy_count += copy_len; npages = PAGE_ALIGN(rqst->rq_snd_buf.page_base+copy_len) >> PAGE_SHIFT; for (i = 0; copy_len && i < npages; i++) { @@ -332,17 +345,6 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad) destp += curlen; copy_len -= curlen; } - if (rqst->rq_snd_buf.tail[0].iov_len) { - curlen = rqst->rq_snd_buf.tail[0].iov_len; - if (destp != rqst->rq_snd_buf.tail[0].iov_base) { - memcpy(destp, - rqst->rq_snd_buf.tail[0].iov_base, curlen); - r_xprt->rx_stats.pullup_copy_count += curlen; - } - dprintk("RPC: %s: tail destp 0x%p len %d curlen %d\n", - __func__, destp, copy_len, curlen); - rqst->rq_svec[0].iov_len += curlen; - } /* header now contains entire send message */ return pad; } @@ -656,7 +658,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) if (curlen > rqst->rq_rcv_buf.tail[0].iov_len) curlen = rqst->rq_rcv_buf.tail[0].iov_len; if (rqst->rq_rcv_buf.tail[0].iov_base != srcp) - memcpy(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); + memmove(rqst->rq_rcv_buf.tail[0].iov_base, srcp, curlen); dprintk("RPC: %s: tail srcp 0x%p len %d curlen %d\n", __func__, srcp, copy_len, curlen); rqst->rq_rcv_buf.tail[0].iov_len = curlen; -- cgit v1.2.3 From 441e3e242903f9b190d5764bed73edb58f977413 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Wed, 11 Mar 2009 14:37:56 -0400 Subject: SUNRPC: dynamically load RPC transport modules on-demand Provide an api to attempt to load any necessary kernel RPC client transport module automatically. By convention, the desired module name is "xprt"+"transport name". For example, when NFS mounting with "-o proto=rdma", attempt to load the "xprtrdma" module. Signed-off-by: Tom Talpey Cc: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 62098d101a1f..d1afec640394 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -151,6 +151,37 @@ out: } EXPORT_SYMBOL_GPL(xprt_unregister_transport); +/** + * xprt_load_transport - load a transport implementation + * @transport_name: transport to load + * + * Returns: + * 0: transport successfully loaded + * -ENOENT: transport module not available + */ +int xprt_load_transport(const char *transport_name) +{ + struct xprt_class *t; + char module_name[sizeof t->name + 5]; + int result; + + result = 0; + spin_lock(&xprt_list_lock); + list_for_each_entry(t, &xprt_list, list) { + if (strcmp(t->name, transport_name) == 0) { + spin_unlock(&xprt_list_lock); + goto out; + } + } + spin_unlock(&xprt_list_lock); + strcpy(module_name, "xprt"); + strncat(module_name, transport_name, sizeof t->name); + result = request_module(module_name); +out: + return result; +} +EXPORT_SYMBOL_GPL(xprt_load_transport); + /** * xprt_reserve_xprt - serialize write access to transports * @task: task that is requesting access to the transport -- cgit v1.2.3 From 15f081ca8ddfe150fb639c591b18944a539da0fc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:37:57 -0400 Subject: SUNRPC: Avoid an unnecessary task reschedule on ENOTCONN If the socket is unconnected, and xprt_transmit() returns ENOTCONN, we currently give up the lock on the transport channel. Doing so means that the lock automatically gets assigned to the next task in the xprt->sending queue, and so that task needs to be woken up to do the actual connect. The following patch aims to avoid that unnecessary task switch. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 836f15c0c4a3..07e9b05321e6 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1105,14 +1105,24 @@ static void call_transmit_status(struct rpc_task *task) { task->tk_action = call_status; - /* - * Special case: if we've been waiting on the socket's write_space() - * callback, then don't call xprt_end_transmit(). - */ - if (task->tk_status == -EAGAIN) - return; - xprt_end_transmit(task); - rpc_task_force_reencode(task); + switch (task->tk_status) { + case -EAGAIN: + break; + default: + xprt_end_transmit(task); + /* + * Special cases: if we've been waiting on the + * socket's write_space() callback, or if the + * socket just returned a connection error, + * then hold onto the transport lock. + */ + case -ECONNREFUSED: + case -ENOTCONN: + case -EHOSTDOWN: + case -EHOSTUNREACH: + case -ENETUNREACH: + rpc_task_force_reencode(task); + } } /* -- cgit v1.2.3 From 670f94573104b4a25525d3fcdcd6496c678df172 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:37:58 -0400 Subject: SUNRPC: Ensure we set XPRT_CLOSING only after we've sent a tcp FIN... ...so that we can distinguish between when we need to shutdown and when we don't. Also remove the call to xs_tcp_shutdown() from xs_tcp_connect(), since xprt_connect() makes the same test. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 1127eb934136..cb4bd93b9211 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1180,7 +1180,6 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ - set_bit(XPRT_CLOSING, &xprt->state); xprt_force_disconnect(xprt); case TCP_SYN_SENT: xprt->connect_cookie++; @@ -1193,6 +1192,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO; break; case TCP_LAST_ACK: + set_bit(XPRT_CLOSING, &xprt->state); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); @@ -1836,9 +1836,6 @@ static void xs_tcp_connect(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - /* Initiate graceful shutdown of the socket if not already done */ - if (test_bit(XPRT_CONNECTED, &xprt->state)) - xs_tcp_shutdown(xprt); /* Exit if we need to wait for socket shutdown to complete */ if (test_bit(XPRT_CLOSING, &xprt->state)) return; -- cgit v1.2.3 From 40d2549db5f515e415894def98b49db7d4c56714 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:37:58 -0400 Subject: SUNRPC: Don't disconnect if a connection is still in progress. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cb4bd93b9211..9d1898f6ee87 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1613,10 +1613,9 @@ out: * We need to preserve the port number so the reply cache on the server can * find our cached RPC replies when we get around to reconnecting. */ -static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) +static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) { int result; - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct sockaddr any; dprintk("RPC: disconnecting xprt %p to reuse port\n", xprt); @@ -1633,6 +1632,17 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) result); } +static void xs_tcp_reuse_connection(struct rpc_xprt *xprt, struct sock_xprt *transport) +{ + unsigned int state = transport->inet->sk_state; + + if (state == TCP_CLOSE && transport->sock->state == SS_UNCONNECTED) + return; + if ((1 << state) & (TCPF_ESTABLISHED|TCPF_SYN_SENT)) + return; + xs_abort_connection(xprt, transport); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); @@ -1706,7 +1716,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) } } else /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + xs_tcp_reuse_connection(xprt, transport); dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1766,7 +1776,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) } } else /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt); + xs_tcp_reuse_connection(xprt, transport); dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); -- cgit v1.2.3 From c8485e4d634f6df155040293928707f127f0d06d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:37:59 -0400 Subject: SUNRPC: Handle ECONNREFUSED correctly in xprt_transmit() If we get an ECONNREFUSED error, we currently go to sleep on the 'xprt->sending' wait queue. The problem is that no timeout is set there, and there is nothing else that will wake the task up later. We should deal with ECONNREFUSED in call_status, given that is where we also deal with -EHOSTDOWN, and friends. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 7 ++++++- net/sunrpc/xprt.c | 38 ++++++++++++++++---------------------- net/sunrpc/xprtsock.c | 26 ++++++++++++-------------- 3 files changed, 34 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 07e9b05321e6..145715b53115 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1117,10 +1117,12 @@ call_transmit_status(struct rpc_task *task) * then hold onto the transport lock. */ case -ECONNREFUSED: + case -ECONNRESET: case -ENOTCONN: case -EHOSTDOWN: case -EHOSTUNREACH: case -ENETUNREACH: + case -EPIPE: rpc_task_force_reencode(task); } } @@ -1162,9 +1164,12 @@ call_status(struct rpc_task *task) xprt_conditional_disconnect(task->tk_xprt, req->rq_connect_cookie); break; + case -ECONNRESET: case -ECONNREFUSED: - case -ENOTCONN: rpc_force_rebind(clnt); + rpc_delay(task, 3*HZ); + case -EPIPE: + case -ENOTCONN: task->tk_action = call_bind; break; case -EAGAIN: diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d1afec640394..d588e755e107 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -901,32 +901,26 @@ void xprt_transmit(struct rpc_task *task) req->rq_connect_cookie = xprt->connect_cookie; req->rq_xtime = jiffies; status = xprt->ops->send_request(task); - if (status == 0) { - dprintk("RPC: %5u xmit complete\n", task->tk_pid); - spin_lock_bh(&xprt->transport_lock); + if (status != 0) { + task->tk_status = status; + return; + } - xprt->ops->set_retrans_timeout(task); + dprintk("RPC: %5u xmit complete\n", task->tk_pid); + spin_lock_bh(&xprt->transport_lock); - xprt->stat.sends++; - xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; - xprt->stat.bklog_u += xprt->backlog.qlen; + xprt->ops->set_retrans_timeout(task); - /* Don't race with disconnect */ - if (!xprt_connected(xprt)) - task->tk_status = -ENOTCONN; - else if (!req->rq_received) - rpc_sleep_on(&xprt->pending, task, xprt_timer); - spin_unlock_bh(&xprt->transport_lock); - return; - } + xprt->stat.sends++; + xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs; + xprt->stat.bklog_u += xprt->backlog.qlen; - /* Note: at this point, task->tk_sleeping has not yet been set, - * hence there is no danger of the waking up task being put on - * schedq, and being picked up by a parallel run of rpciod(). - */ - task->tk_status = status; - if (status == -ECONNREFUSED) - rpc_sleep_on(&xprt->sending, task, NULL); + /* Don't race with disconnect */ + if (!xprt_connected(xprt)) + task->tk_status = -ENOTCONN; + else if (!req->rq_received) + rpc_sleep_on(&xprt->pending, task, xprt_timer); + spin_unlock_bh(&xprt->transport_lock); } static inline void do_xprt_reserve(struct rpc_task *task) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9d1898f6ee87..5e8198bede81 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -594,6 +594,8 @@ static int xs_udp_send_request(struct rpc_task *task) /* Still some bytes left; set up for a retry later. */ status = -EAGAIN; } + if (!transport->sock) + goto out; switch (status) { case -ENOTSOCK: @@ -603,19 +605,17 @@ static int xs_udp_send_request(struct rpc_task *task) case -EAGAIN: xs_nospace(task); break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); case -ENETUNREACH: case -EPIPE: case -ECONNREFUSED: /* When the server has died, an ICMP port unreachable message * prompts ECONNREFUSED. */ clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - break; - default: - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); } - +out: return status; } @@ -697,6 +697,8 @@ static int xs_tcp_send_request(struct rpc_task *task) status = -EAGAIN; break; } + if (!transport->sock) + goto out; switch (status) { case -ENOTSOCK: @@ -706,21 +708,17 @@ static int xs_tcp_send_request(struct rpc_task *task) case -EAGAIN: xs_nospace(task); break; + default: + dprintk("RPC: sendmsg returned unrecognized error %d\n", + -status); case -ECONNRESET: xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: case -EPIPE: - status = -ENOTCONN; - clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - break; - default: - dprintk("RPC: sendmsg returned unrecognized error %d\n", - -status); clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - xs_tcp_shutdown(xprt); } - +out: return status; } -- cgit v1.2.3 From 482f32e65d31cbf88d08306fa5d397cc945c3c26 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:00 -0400 Subject: SUNRPC: Handle socket errors correctly Ensure that we pick up and handle socket errors as they occur. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 5e8198bede81..879af6f27b4c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1208,23 +1208,20 @@ static void xs_tcp_state_change(struct sock *sk) } /** - * xs_tcp_error_report - callback mainly for catching RST events + * xs_error_report - callback mainly for catching socket errors * @sk: socket */ -static void xs_tcp_error_report(struct sock *sk) +static void xs_error_report(struct sock *sk) { struct rpc_xprt *xprt; read_lock(&sk->sk_callback_lock); - if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED) - goto out; if (!(xprt = xprt_from_sock(sk))) goto out; dprintk("RPC: %s client %p...\n" "RPC: error %d\n", __func__, xprt, sk->sk_err); - - xprt_force_disconnect(xprt); + xprt_wake_pending_tasks(xprt, -EAGAIN); out: read_unlock(&sk->sk_callback_lock); } @@ -1509,6 +1506,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_user_data = xprt; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; + sk->sk_error_report = xs_error_report; sk->sk_no_check = UDP_CSUM_NORCV; sk->sk_allocation = GFP_ATOMIC; @@ -1656,7 +1654,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; - sk->sk_error_report = xs_tcp_error_report; + sk->sk_error_report = xs_error_report; sk->sk_allocation = GFP_ATOMIC; /* socket options */ -- cgit v1.2.3 From 2a4919919a97911b0aa4b9f5ac1eab90ba87652b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:00 -0400 Subject: SUNRPC: Return EAGAIN instead of ENOTCONN when waking up xprt->pending While we should definitely return socket errors to the task that is currently trying to send data, there is no need to propagate the same error to all the other tasks on xprt->pending. Doing so actually slows down recovery, since it causes more than one tasks to attempt socket recovery. Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 15 ++++--------- net/sunrpc/xprt.c | 20 ++++++------------ net/sunrpc/xprtsock.c | 58 +++++++++++++++++++++++++++------------------------ 3 files changed, 41 insertions(+), 52 deletions(-) (limited to 'net') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 145715b53115..5abab094441f 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1032,27 +1032,20 @@ call_connect_status(struct rpc_task *task) dprint_status(task); task->tk_status = 0; - if (status >= 0) { + if (status >= 0 || status == -EAGAIN) { clnt->cl_stats->netreconn++; task->tk_action = call_transmit; return; } - /* Something failed: remote service port may have changed */ - rpc_force_rebind(clnt); - switch (status) { - case -ENOTCONN: - case -EAGAIN: - task->tk_action = call_bind; - if (!RPC_IS_SOFT(task)) - return; /* if soft mounted, test if we've timed out */ case -ETIMEDOUT: task->tk_action = call_timeout; - return; + break; + default: + rpc_exit(task, -EIO); } - rpc_exit(task, -EIO); } /* diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d588e755e107..a0bfe53f1621 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -611,7 +611,7 @@ void xprt_disconnect_done(struct rpc_xprt *xprt) dprintk("RPC: disconnected transport %p\n", xprt); spin_lock_bh(&xprt->transport_lock); xprt_clear_connected(xprt); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } EXPORT_SYMBOL_GPL(xprt_disconnect_done); @@ -629,7 +629,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); spin_unlock_bh(&xprt->transport_lock); } @@ -656,7 +656,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) /* Try to schedule an autoclose RPC call */ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) queue_work(rpciod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -ENOTCONN); + xprt_wake_pending_tasks(xprt, -EAGAIN); out: spin_unlock_bh(&xprt->transport_lock); } @@ -726,9 +726,8 @@ static void xprt_connect_status(struct rpc_task *task) } switch (task->tk_status) { - case -ENOTCONN: - dprintk("RPC: %5u xprt_connect_status: connection broken\n", - task->tk_pid); + case -EAGAIN: + dprintk("RPC: %5u xprt_connect_status: retrying\n", task->tk_pid); break; case -ETIMEDOUT: dprintk("RPC: %5u xprt_connect_status: connect attempt timed " @@ -849,15 +848,8 @@ int xprt_prepare_transmit(struct rpc_task *task) err = req->rq_received; goto out_unlock; } - if (!xprt->ops->reserve_xprt(task)) { + if (!xprt->ops->reserve_xprt(task)) err = -EAGAIN; - goto out_unlock; - } - - if (!xprt_connected(xprt)) { - err = -ENOTCONN; - goto out_unlock; - } out_unlock: spin_unlock_bh(&xprt->transport_lock); return err; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 879af6f27b4c..8e58b0b5460b 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1162,7 +1162,7 @@ static void xs_tcp_state_change(struct sock *sk) transport->tcp_flags = TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID; - xprt_wake_pending_tasks(xprt, 0); + xprt_wake_pending_tasks(xprt, -EAGAIN); } spin_unlock_bh(&xprt->transport_lock); break; @@ -1721,20 +1721,22 @@ static void xs_tcp_connect_worker4(struct work_struct *work) dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - } + switch (status) { + case 0: + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); } + status = -EAGAIN; out: xprt_wake_pending_tasks(xprt, status); out_clear: @@ -1780,20 +1782,22 @@ static void xs_tcp_connect_worker6(struct work_struct *work) status = xs_tcp_finish_connecting(xprt, sock); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - if (status < 0) { - switch (status) { - case -EINPROGRESS: - case -EALREADY: - goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - } + switch (status) { + case 0: + case -EINPROGRESS: + case -EALREADY: + goto out_clear; + case -ECONNREFUSED: + case -ECONNRESET: + /* retry with existing socket, after a delay */ + break; + default: + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); } + status = -EAGAIN; out: xprt_wake_pending_tasks(xprt, status); out_clear: -- cgit v1.2.3 From 8a2cec295f4499cc9d4452e9b02d4ed071bb42d3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:01 -0400 Subject: SUNRPC: Delay, then retry on connection errors. Enforce the comment in xs_tcp_connect_worker4/xs_tcp_connect_worker6 that we should delay, then retry on certain connection errors. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 8e58b0b5460b..9f3e615d3e09 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1722,20 +1722,19 @@ static void xs_tcp_connect_worker4(struct work_struct *work) xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + /* retry with existing socket, after a delay */ case 0: case -EINPROGRESS: case -EALREADY: goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); } + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); status = -EAGAIN; out: xprt_wake_pending_tasks(xprt, status); @@ -1783,20 +1782,19 @@ static void xs_tcp_connect_worker6(struct work_struct *work) dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); switch (status) { + case -ECONNREFUSED: + case -ECONNRESET: + case -ENETUNREACH: + /* retry with existing socket, after a delay */ case 0: case -EINPROGRESS: case -EALREADY: goto out_clear; - case -ECONNREFUSED: - case -ECONNRESET: - /* retry with existing socket, after a delay */ - break; - default: - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); } + /* get rid of existing socket, and retry */ + xs_tcp_shutdown(xprt); + printk("%s: connect returned unhandled error %d\n", + __func__, status); status = -EAGAIN; out: xprt_wake_pending_tasks(xprt, status); -- cgit v1.2.3 From 5e3771ce2d6a69e10fcc870cdf226d121d868491 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:01 -0400 Subject: SUNRPC: Ensure that xs_nospace return values are propagated If xs_nospace() finds that the socket has disconnected, it attempts to return ENOTCONN, however that value is then squashed by the callers. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9f3e615d3e09..2e070679ab4a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -521,11 +521,12 @@ static void xs_nospace_callback(struct rpc_task *task) * @task: task to put to sleep * */ -static void xs_nospace(struct rpc_task *task) +static int xs_nospace(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + int ret = 0; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", task->tk_pid, req->rq_slen - req->rq_bytes_sent, @@ -537,6 +538,7 @@ static void xs_nospace(struct rpc_task *task) /* Don't race with disconnect */ if (xprt_connected(xprt)) { if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) { + ret = -EAGAIN; /* * Notify TCP that we're limited by the application * window size @@ -548,10 +550,11 @@ static void xs_nospace(struct rpc_task *task) } } else { clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); - task->tk_status = -ENOTCONN; + ret = -ENOTCONN; } spin_unlock_bh(&xprt->transport_lock); + return ret; } /** @@ -603,7 +606,7 @@ static int xs_udp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - xs_nospace(task); + status = xs_nospace(task); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", @@ -706,7 +709,7 @@ static int xs_tcp_send_request(struct rpc_task *task) /* Should we call xs_close() here? */ break; case -EAGAIN: - xs_nospace(task); + status = xs_nospace(task); break; default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -- cgit v1.2.3 From a70f730282019f487aa33a84e5ac9a5e89c5abd0 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Fri, 13 Mar 2009 14:49:46 +1030 Subject: cpumask: replace node_to_cpumask with cpumask_of_node. Impact: cleanup node_to_cpumask (and the blecherous node_to_cpumask_ptr which contained a declaration) are replaced now everyone implements cpumask_of_node. Signed-off-by: Rusty Russell --- net/sunrpc/svc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c51fed4d1af1..3bdd5bffaca8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -317,8 +317,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) } case SVC_POOL_PERNODE: { - node_to_cpumask_ptr(nodecpumask, node); - set_cpus_allowed_ptr(task, nodecpumask); + set_cpus_allowed_ptr(task, cpumask_of_node(node)); break; } } -- cgit v1.2.3 From 5e8f3f703ae4e4af65e2695e486b3cd198328863 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Thu, 12 Mar 2009 09:49:17 +0000 Subject: sctp: simplify sctp listening code sctp_inet_listen() call is split between UDP and TCP style. Looking at the code, the two functions are almost the same and can be merged into a single helper. This also fixes a bug that was fixed in the UDP function, but missed in the TCP function. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/socket.c | 152 ++++++++++++++++++------------------------------------ 1 file changed, 50 insertions(+), 102 deletions(-) (limited to 'net') diff --git a/net/sctp/socket.c b/net/sctp/socket.c index bbd3cd238d7f..5fb3a8c9792e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -5843,37 +5843,28 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) } /* - * 3.1.3 listen() - UDP Style Syntax - * - * By default, new associations are not accepted for UDP style sockets. - * An application uses listen() to mark a socket as being able to - * accept new associations. + * Move a socket to LISTENING state. */ -SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) +SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) { struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; + struct crypto_hash *tfm = NULL; - /* Only UDP style sockets that are not peeled off are allowed to - * listen(). - */ - if (!sctp_style(sk, UDP)) - return -EINVAL; - - /* If backlog is zero, disable listening. */ - if (!backlog) { - if (sctp_sstate(sk, CLOSED)) - return 0; - - sctp_unhash_endpoint(ep); - sk->sk_state = SCTP_SS_CLOSED; - return 0; + /* Allocate HMAC for generating cookie. */ + if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { + tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + if (net_ratelimit()) { + printk(KERN_INFO + "SCTP: failed to load transform for %s: %ld\n", + sctp_hmac_alg, PTR_ERR(tfm)); + } + return -ENOSYS; + } + sctp_sk(sk)->hmac = tfm; } - /* Return if we are already listening. */ - if (sctp_sstate(sk, LISTENING)) - return 0; - /* * If a bind() or sctp_bindx() is not called prior to a listen() * call that allows new associations to be accepted, the system @@ -5884,7 +5875,6 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) * extensions draft, but follows the practice as seen in TCP * sockets. * - * Additionally, turn off fastreuse flag since we are not listening */ sk->sk_state = SCTP_SS_LISTENING; if (!ep->base.bind_addr.port) { @@ -5895,113 +5885,71 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) sk->sk_state = SCTP_SS_CLOSED; return -EADDRINUSE; } - sctp_sk(sk)->bind_hash->fastreuse = 0; } - sctp_hash_endpoint(ep); - return 0; -} - -/* - * 4.1.3 listen() - TCP Style Syntax - * - * Applications uses listen() to ready the SCTP endpoint for accepting - * inbound associations. - */ -SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) -{ - struct sctp_sock *sp = sctp_sk(sk); - struct sctp_endpoint *ep = sp->ep; - - /* If backlog is zero, disable listening. */ - if (!backlog) { - if (sctp_sstate(sk, CLOSED)) - return 0; - - sctp_unhash_endpoint(ep); - sk->sk_state = SCTP_SS_CLOSED; - return 0; - } - - if (sctp_sstate(sk, LISTENING)) - return 0; - - /* - * If a bind() or sctp_bindx() is not called prior to a listen() - * call that allows new associations to be accepted, the system - * picks an ephemeral port and will choose an address set equivalent - * to binding with a wildcard address. - * - * This is not currently spelled out in the SCTP sockets - * extensions draft, but follows the practice as seen in TCP - * sockets. - */ - sk->sk_state = SCTP_SS_LISTENING; - if (!ep->base.bind_addr.port) { - if (sctp_autobind(sk)) - return -EAGAIN; - } else - sctp_sk(sk)->bind_hash->fastreuse = 0; - sk->sk_max_ack_backlog = backlog; sctp_hash_endpoint(ep); return 0; } /* + * 4.1.3 / 5.1.3 listen() + * + * By default, new associations are not accepted for UDP style sockets. + * An application uses listen() to mark a socket as being able to + * accept new associations. + * + * On TCP style sockets, applications use listen() to ready the SCTP + * endpoint for accepting inbound associations. + * + * On both types of endpoints a backlog of '0' disables listening. + * * Move a socket to LISTENING state. */ int sctp_inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; - struct crypto_hash *tfm = NULL; + struct sctp_endpoint *ep = sctp_sk(sk)->ep; int err = -EINVAL; if (unlikely(backlog < 0)) - goto out; + return err; sctp_lock_sock(sk); + /* Peeled-off sockets are not allowed to listen(). */ + if (sctp_style(sk, UDP_HIGH_BANDWIDTH)) + goto out; + if (sock->state != SS_UNCONNECTED) goto out; - /* Allocate HMAC for generating cookie. */ - if (!sctp_sk(sk)->hmac && sctp_hmac_alg) { - tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) { - if (net_ratelimit()) { - printk(KERN_INFO - "SCTP: failed to load transform for %s: %ld\n", - sctp_hmac_alg, PTR_ERR(tfm)); - } - err = -ENOSYS; + /* If backlog is zero, disable listening. */ + if (!backlog) { + if (sctp_sstate(sk, CLOSED)) goto out; - } - } - switch (sock->type) { - case SOCK_SEQPACKET: - err = sctp_seqpacket_listen(sk, backlog); - break; - case SOCK_STREAM: - err = sctp_stream_listen(sk, backlog); - break; - default: - break; + err = 0; + sctp_unhash_endpoint(ep); + sk->sk_state = SCTP_SS_CLOSED; + if (sk->sk_reuse) + sctp_sk(sk)->bind_hash->fastreuse = 1; + goto out; } - if (err) - goto cleanup; + /* If we are already listening, just update the backlog */ + if (sctp_sstate(sk, LISTENING)) + sk->sk_max_ack_backlog = backlog; + else { + err = sctp_listen_start(sk, backlog); + if (err) + goto out; + } - /* Store away the transform reference. */ - if (!sctp_sk(sk)->hmac) - sctp_sk(sk)->hmac = tfm; + err = 0; out: sctp_release_sock(sk); return err; -cleanup: - crypto_free_hash(tfm); - goto out; } /* -- cgit v1.2.3 From 5ffad5acebec735b7a368851bf22394b734cae8a Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 12 Mar 2009 09:49:18 +0000 Subject: sctp: fix to indicate ASCONF support in INIT-ACK only if peer has such capable This patch fix to indicate ASCONF support in INIT-ACK only if peer has such capable. This patch also fix to calc the chunk size if peer has no FWD-TSN capable. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b40e95f9851b..9484f33730f6 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -372,10 +372,10 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, if (asoc->peer.ecn_capable) chunksize += sizeof(ecap_param); - if (sctp_prsctp_enable) + if (asoc->peer.prsctp_capable) chunksize += sizeof(prsctp_param); - if (sctp_addip_enable) { + if (asoc->peer.asconf_capable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; -- cgit v1.2.3 From 76595024ffab3599bd28ea014f6c23c1a8c8dd2c Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 12 Mar 2009 09:49:19 +0000 Subject: sctp: fix to send FORWARD-TSN chunk only if peer has such capable RFC3758 Section 3.3.1. Sending Forward-TSN-Supported param in INIT Note that if the endpoint chooses NOT to include the parameter, then at no time during the life of the association can it send or process a FORWARD TSN. If peer does not support PR-SCTP capable, don't send FORWARD-TSN chunk to peer. Signed-off-by: Wei Yongjun Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index a367d15a21aa..d765fc53e74d 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1758,6 +1758,9 @@ static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 ctsn) struct sctp_chunk *chunk; struct list_head *lchunk, *temp; + if (!asoc->peer.prsctp_capable) + return; + /* PR-SCTP C1) Let SackCumAck be the Cumulative TSN ACK carried in the * received SACK. * -- cgit v1.2.3 From 6fc791ee631728b2beddda87560f1af59e32230e Mon Sep 17 00:00:00 2001 From: malc Date: Thu, 12 Mar 2009 09:49:20 +0000 Subject: sctp: add Adaptation Layer Indication parameter only when it's set RFC5061 states: Each adaptation layer that is defined that wishes to use this parameter MUST specify an adaptation code point in an appropriate RFC defining its use and meaning. If the user has not set one - assume they don't want to sent the param with a zero Adaptation Code Point. Rationale - Currently the IANA defines zero as reserved - and 1 as the only valid value - so we consider zero to be unset - to save adding a boolean to the socket structure. Including this parameter unconditionally causes endpoints that do not understand it to report errors unnecessarily. Signed-off-by: Malcolm Lashley Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 9484f33730f6..6851ee94e974 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -224,7 +224,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, num_ext += 2; } - chunksize += sizeof(aiparam); + if (sp->adaptation_ind) + chunksize += sizeof(aiparam); + chunksize += vparam_len; /* Account for AUTH related parameters */ @@ -304,10 +306,12 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, if (sctp_prsctp_enable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); - aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; - aiparam.param_hdr.length = htons(sizeof(aiparam)); - aiparam.adaptation_ind = htonl(sp->adaptation_ind); - sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); + if (sp->adaptation_ind) { + aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; + aiparam.param_hdr.length = htons(sizeof(aiparam)); + aiparam.adaptation_ind = htonl(sp->adaptation_ind); + sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); + } /* Add SCTP-AUTH chunks to the parameter list */ if (sctp_auth_enable) { @@ -332,6 +336,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_inithdr_t initack; struct sctp_chunk *retval; union sctp_params addrs; + struct sctp_sock *sp; int addrs_len; sctp_cookie_param_t *cookie; int cookie_len; @@ -366,6 +371,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, /* Calculate the total size of allocation, include the reserved * space for reporting unknown parameters if it is specified. */ + sp = sctp_sk(asoc->base.sk); chunksize = sizeof(initack) + addrs_len + cookie_len + unkparam_len; /* Tell peer that we'll do ECN only if peer advertised such cap. */ @@ -381,7 +387,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, num_ext += 2; } - chunksize += sizeof(aiparam); + if (sp->adaptation_ind) + chunksize += sizeof(aiparam); if (asoc->peer.auth_capable) { auth_random = (sctp_paramhdr_t *)asoc->c.auth_random; @@ -432,10 +439,12 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, if (asoc->peer.prsctp_capable) sctp_addto_chunk(retval, sizeof(prsctp_param), &prsctp_param); - aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; - aiparam.param_hdr.length = htons(sizeof(aiparam)); - aiparam.adaptation_ind = htonl(sctp_sk(asoc->base.sk)->adaptation_ind); - sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); + if (sp->adaptation_ind) { + aiparam.param_hdr.type = SCTP_PARAM_ADAPTATION_LAYER_IND; + aiparam.param_hdr.length = htons(sizeof(aiparam)); + aiparam.adaptation_ind = htonl(sp->adaptation_ind); + sctp_addto_chunk(retval, sizeof(aiparam), &aiparam); + } if (asoc->peer.auth_capable) { sctp_addto_chunk(retval, ntohs(auth_random->length), -- cgit v1.2.3 From 4893d39e865b2897bf9fcd329697d37032d853a1 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 11 Mar 2009 09:48:26 +0000 Subject: Network Drop Monitor: Add trace declaration for skb frees Signed-off-by: Neil Horman include/trace/skb.h | 8 ++++++++ net/core/Makefile | 2 ++ net/core/net-traces.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) Signed-off-by: David S. Miller --- net/core/Makefile | 2 ++ net/core/net-traces.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 net/core/net-traces.c (limited to 'net') diff --git a/net/core/Makefile b/net/core/Makefile index 26a37cb31923..d47092bc525c 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -17,3 +17,5 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o +obj-$(CONFIG_TRACEPOINTS) += net-traces.o + diff --git a/net/core/net-traces.c b/net/core/net-traces.c new file mode 100644 index 000000000000..c8fb45665e4f --- /dev/null +++ b/net/core/net-traces.c @@ -0,0 +1,29 @@ +/* + * consolidates trace point definitions + * + * Copyright (C) 2009 Neil Horman + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + + +DEFINE_TRACE(kfree_skb); +EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); -- cgit v1.2.3 From ead2ceb0ec9f85cff19c43b5cdb2f8a054484431 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 11 Mar 2009 09:49:55 +0000 Subject: Network Drop Monitor: Adding kfree_skb_clean for non-drops and modifying end-of-line points for skbs Signed-off-by: Neil Horman include/linux/skbuff.h | 4 +++- net/core/datagram.c | 2 +- net/core/skbuff.c | 22 ++++++++++++++++++++++ net/ipv4/arp.c | 2 +- net/ipv4/udp.c | 2 +- net/packet/af_packet.c | 2 +- 6 files changed, 29 insertions(+), 5 deletions(-) Signed-off-by: David S. Miller --- net/core/datagram.c | 2 +- net/core/skbuff.c | 22 ++++++++++++++++++++++ net/ipv4/arp.c | 2 +- net/ipv4/udp.c | 2 +- net/packet/af_packet.c | 2 +- 5 files changed, 26 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/datagram.c b/net/core/datagram.c index 5e2ac0c4b07c..d0de644b378d 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, void skb_free_datagram(struct sock *sk, struct sk_buff *skb) { - kfree_skb(skb); + consume_skb(skb); sk_mem_reclaim_partial(sk); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e5e2111a397d..6acbf9e79eb1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -65,6 +65,7 @@ #include #include +#include #include "kmap_skb.h" @@ -442,10 +443,31 @@ void kfree_skb(struct sk_buff *skb) smp_rmb(); else if (likely(!atomic_dec_and_test(&skb->users))) return; + trace_kfree_skb(skb, __builtin_return_address(0)); __kfree_skb(skb); } EXPORT_SYMBOL(kfree_skb); +/** + * consume_skb - free an skbuff + * @skb: buffer to free + * + * Drop a ref to the buffer and free it if the usage count has hit zero + * Functions identically to kfree_skb, but kfree_skb assumes that the frame + * is being dropped after a failure and notes that + */ +void consume_skb(struct sk_buff *skb) +{ + if (unlikely(!skb)) + return; + if (likely(atomic_read(&skb->users) == 1)) + smp_rmb(); + else if (likely(!atomic_dec_and_test(&skb->users))) + return; + __kfree_skb(skb); +} +EXPORT_SYMBOL(consume_skb); + /** * skb_recycle_check - check if skb can be reused for receive * @skb: buffer diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 3d67d1ffed77..9c220323f353 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -892,7 +892,7 @@ static int arp_process(struct sk_buff *skb) out: if (in_dev) in_dev_put(in_dev); - kfree_skb(skb); + consume_skb(skb); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 4bd178a111d5..05b7abb99f69 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1184,7 +1184,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, sk = sknext; } while (sknext); } else - kfree_skb(skb); + consume_skb(skb); spin_unlock(&hslot->lock); return 0; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index d8cc006fac45..74776de523ec 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -584,7 +584,7 @@ drop_n_restore: skb->len = skb_len; } drop: - kfree_skb(skb); + consume_skb(skb); return 0; } -- cgit v1.2.3 From 9a8afc8d3962f3ed26fd6b56db34133860ed1e72 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 11 Mar 2009 09:51:26 +0000 Subject: Network Drop Monitor: Adding drop monitor implementation & Netlink protocol Signed-off-by: Neil Horman include/linux/net_dropmon.h | 56 +++++++++ net/core/drop_monitor.c | 263 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 319 insertions(+) Signed-off-by: David S. Miller --- net/core/drop_monitor.c | 263 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 263 insertions(+) create mode 100644 net/core/drop_monitor.c (limited to 'net') diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c new file mode 100644 index 000000000000..9fd0dc3cca99 --- /dev/null +++ b/net/core/drop_monitor.c @@ -0,0 +1,263 @@ +/* + * Monitoring code for network dropped packet alerts + * + * Copyright (C) 2009 Neil Horman + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define TRACE_ON 1 +#define TRACE_OFF 0 + +static void send_dm_alert(struct work_struct *unused); + + +/* + * Globals, our netlink socket pointer + * and the work handle that will send up + * netlink alerts + */ +struct sock *dm_sock; + +struct per_cpu_dm_data { + struct work_struct dm_alert_work; + struct sk_buff *skb; + atomic_t dm_hit_count; + struct timer_list send_timer; +}; + +static struct genl_family net_drop_monitor_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = "NET_DM", + .version = 1, + .maxattr = NET_DM_CMD_MAX, +}; + +static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); + +static int dm_hit_limit = 64; +static int dm_delay = 1; + + +static void reset_per_cpu_data(struct per_cpu_dm_data *data) +{ + size_t al; + struct net_dm_alert_msg *msg; + + al = sizeof(struct net_dm_alert_msg); + al += dm_hit_limit * sizeof(struct net_dm_drop_point); + data->skb = genlmsg_new(al, GFP_KERNEL); + genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family, + 0, NET_DM_CMD_ALERT); + msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg)); + memset(msg, 0, al); + atomic_set(&data->dm_hit_count, dm_hit_limit); +} + +static void send_dm_alert(struct work_struct *unused) +{ + struct sk_buff *skb; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + /* + * Grab the skb we're about to send + */ + skb = data->skb; + + /* + * Replace it with a new one + */ + reset_per_cpu_data(data); + + /* + * Ship it! + */ + genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL); + +} + +/* + * This is the timer function to delay the sending of an alert + * in the event that more drops will arrive during the + * hysteresis period. Note that it operates under the timer interrupt + * so we don't need to disable preemption here + */ +static void sched_send_work(unsigned long unused) +{ + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + schedule_work(&data->dm_alert_work); +} + +static void trace_kfree_skb_hit(struct sk_buff *skb, void *location) +{ + struct net_dm_alert_msg *msg; + struct nlmsghdr *nlh; + int i; + struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data); + + + if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) { + /* + * we're already at zero, discard this hit + */ + goto out; + } + + nlh = (struct nlmsghdr *)data->skb->data; + msg = genlmsg_data(nlmsg_data(nlh)); + for (i = 0; i < msg->entries; i++) { + if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { + msg->points[i].count++; + goto out; + } + } + + /* + * We need to create a new entry + */ + __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point)); + memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); + msg->points[msg->entries].count = 1; + msg->entries++; + + if (!timer_pending(&data->send_timer)) { + data->send_timer.expires = jiffies + dm_delay * HZ; + add_timer_on(&data->send_timer, smp_processor_id()); + } + +out: + return; +} + +static int set_all_monitor_traces(int state) +{ + int rc = 0; + + switch (state) { + case TRACE_ON: + rc |= register_trace_kfree_skb(trace_kfree_skb_hit); + break; + case TRACE_OFF: + rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit); + + tracepoint_synchronize_unregister(); + break; + default: + rc = 1; + break; + } + + if (rc) + return -EINPROGRESS; + return rc; +} + + +static int net_dm_cmd_config(struct sk_buff *skb, + struct genl_info *info) +{ + return -ENOTSUPP; +} + +static int net_dm_cmd_trace(struct sk_buff *skb, + struct genl_info *info) +{ + switch (info->genlhdr->cmd) { + case NET_DM_CMD_START: + return set_all_monitor_traces(TRACE_ON); + break; + case NET_DM_CMD_STOP: + return set_all_monitor_traces(TRACE_OFF); + break; + } + + return -ENOTSUPP; +} + + +static struct genl_ops dropmon_ops[] = { + { + .cmd = NET_DM_CMD_CONFIG, + .doit = net_dm_cmd_config, + }, + { + .cmd = NET_DM_CMD_START, + .doit = net_dm_cmd_trace, + }, + { + .cmd = NET_DM_CMD_STOP, + .doit = net_dm_cmd_trace, + }, +}; + +static int __init init_net_drop_monitor(void) +{ + int cpu; + int rc, i, ret; + struct per_cpu_dm_data *data; + printk(KERN_INFO "Initalizing network drop monitor service\n"); + + if (sizeof(void *) > 8) { + printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n"); + return -ENOSPC; + } + + if (genl_register_family(&net_drop_monitor_family) < 0) { + printk(KERN_ERR "Could not create drop monitor netlink family\n"); + return -EFAULT; + } + + rc = -EFAULT; + + for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) { + ret = genl_register_ops(&net_drop_monitor_family, + &dropmon_ops[i]); + if (ret) { + printk(KERN_CRIT "failed to register operation %d\n", + dropmon_ops[i].cmd); + goto out_unreg; + } + } + + rc = 0; + + for_each_present_cpu(cpu) { + data = &per_cpu(dm_cpu_data, cpu); + reset_per_cpu_data(data); + INIT_WORK(&data->dm_alert_work, send_dm_alert); + init_timer(&data->send_timer); + data->send_timer.data = cpu; + data->send_timer.function = sched_send_work; + } + goto out; + +out_unreg: + genl_unregister_family(&net_drop_monitor_family); +out: + return rc; +} + +late_initcall(init_net_drop_monitor); -- cgit v1.2.3 From 273ae44b9cb9443e0b5265cdc99f127ddb95c8db Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Wed, 11 Mar 2009 09:53:16 +0000 Subject: Network Drop Monitor: Adding Build changes to enable drop monitor Network Drop Monitor: Adding Build changes to enable drop monitor Signed-off-by: Neil Horman include/linux/Kbuild | 1 + net/Kconfig | 11 +++++++++++ net/core/Makefile | 1 + 3 files changed, 13 insertions(+) Signed-off-by: David S. Miller --- net/Kconfig | 11 +++++++++++ net/core/Makefile | 1 + 2 files changed, 12 insertions(+) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index 6b39ede3b1b1..c9fdcd7e71ea 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -222,6 +222,17 @@ config NET_TCPPROBE To compile this code as a module, choose M here: the module will be called tcp_probe. +config NET_DROP_MONITOR + boolean "Network packet drop alerting service" + depends on INET && EXPERIMENTAL && TRACEPOINTS + ---help--- + This feature provides an alerting service to userspace in the + event that packets are discarded in the network stack. Alerts + are broadcast via netlink socket to any listening user space + process. If you don't need network drop alerts, or if you are ok + just checking the various proc files and other utilities for + drop statistics, say N here. + endmenu endmenu diff --git a/net/core/Makefile b/net/core/Makefile index d47092bc525c..796f46eece5f 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -18,4 +18,5 @@ obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o obj-$(CONFIG_FIB_RULES) += fib_rules.o obj-$(CONFIG_TRACEPOINTS) += net-traces.o +obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o -- cgit v1.2.3 From 1c8dbcf6496c2612d883a8bc6bccc38000e14866 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Fri, 27 Feb 2009 14:06:54 -0800 Subject: [SCSI] net: add NETIF_F_FCOE_CRC to can_checksum_protocol Add FC CRC offload check for ETH_P_FCOE. Signed-off-by: Yi Zou Acked-by: David Miller Signed-off-by: James Bottomley --- net/core/dev.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 72b0d26fd46d..3d3670640c2d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1457,7 +1457,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol) ((features & NETIF_F_IP_CSUM) && protocol == htons(ETH_P_IP)) || ((features & NETIF_F_IPV6_CSUM) && - protocol == htons(ETH_P_IPV6))); + protocol == htons(ETH_P_IPV6)) || + ((features & NETIF_F_FCOE_CRC) && + protocol == htons(ETH_P_FCOE))); } static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb) -- cgit v1.2.3 From 08ec9af1c0622b0858099a8644a33af02dd3019f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 13 Mar 2009 14:22:40 -0700 Subject: xfrm: Fix xfrm_state_find() wrt. wildcard source address. The change to make xfrm_state objects hash on source address broke the case where such source addresses are wildcarded. Fix this by doing a two phase lookup, first with fully specified source address, next using saddr wildcarded. Reported-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 90 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 58 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index e25ff62ab2a6..62a5425cc6aa 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -748,12 +748,51 @@ static void xfrm_hash_grow_check(struct net *net, int have_hash_collision) schedule_work(&net->xfrm.state_hash_work); } +static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x, + struct flowi *fl, unsigned short family, + xfrm_address_t *daddr, xfrm_address_t *saddr, + struct xfrm_state **best, int *acq_in_progress, + int *error) +{ + /* Resolution logic: + * 1. There is a valid state with matching selector. Done. + * 2. Valid state with inappropriate selector. Skip. + * + * Entering area of "sysdeps". + * + * 3. If state is not valid, selector is temporary, it selects + * only session which triggered previous resolution. Key + * manager will do something to install a state with proper + * selector. + */ + if (x->km.state == XFRM_STATE_VALID) { + if ((x->sel.family && + !xfrm_selector_match(&x->sel, fl, x->sel.family)) || + !security_xfrm_state_pol_flow_match(x, pol, fl)) + return; + + if (!*best || + (*best)->km.dying > x->km.dying || + ((*best)->km.dying == x->km.dying && + (*best)->curlft.add_time < x->curlft.add_time)) + *best = x; + } else if (x->km.state == XFRM_STATE_ACQ) { + *acq_in_progress = 1; + } else if (x->km.state == XFRM_STATE_ERROR || + x->km.state == XFRM_STATE_EXPIRED) { + if (xfrm_selector_match(&x->sel, fl, x->sel.family) && + security_xfrm_state_pol_flow_match(x, pol, fl)) + *error = -ESRCH; + } +} + struct xfrm_state * xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family) { + static xfrm_address_t saddr_wildcard = { }; struct net *net = xp_net(pol); unsigned int h; struct hlist_node *entry; @@ -773,40 +812,27 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && - (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) { - /* Resolution logic: - 1. There is a valid state with matching selector. - Done. - 2. Valid state with inappropriate selector. Skip. - - Entering area of "sysdeps". - - 3. If state is not valid, selector is temporary, - it selects only session which triggered - previous resolution. Key manager will do - something to install a state with proper - selector. - */ - if (x->km.state == XFRM_STATE_VALID) { - if ((x->sel.family && !xfrm_selector_match(&x->sel, fl, x->sel.family)) || - !security_xfrm_state_pol_flow_match(x, pol, fl)) - continue; - if (!best || - best->km.dying > x->km.dying || - (best->km.dying == x->km.dying && - best->curlft.add_time < x->curlft.add_time)) - best = x; - } else if (x->km.state == XFRM_STATE_ACQ) { - acquire_in_progress = 1; - } else if (x->km.state == XFRM_STATE_ERROR || - x->km.state == XFRM_STATE_EXPIRED) { - if (xfrm_selector_match(&x->sel, fl, x->sel.family) && - security_xfrm_state_pol_flow_match(x, pol, fl)) - error = -ESRCH; - } - } + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + &best, &acquire_in_progress, &error); + } + if (best) + goto found; + + h = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, family); + hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == tmpl->reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && + xfrm_state_addr_check(x, daddr, saddr, family) && + tmpl->mode == x->props.mode && + tmpl->id.proto == x->id.proto && + (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) + xfrm_state_look_at(pol, x, fl, family, daddr, saddr, + &best, &acquire_in_progress, &error); } +found: x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && -- cgit v1.2.3 From 73ce7b01b4496a5fbf9caf63033c874be692333f Mon Sep 17 00:00:00 2001 From: Denys Fedoryshchenko Date: Fri, 13 Mar 2009 16:02:07 -0700 Subject: ipv4: arp announce, arp_proxy and windows ip conflict verification Windows (XP at least) hosts on boot, with configured static ip, performing address conflict detection, which is defined in RFC3927. Here is quote of important information: " An ARP announcement is identical to the ARP Probe described above, except that now the sender and target IP addresses are both set to the host's newly selected IPv4 address. " But it same time this goes wrong with RFC5227. " The 'sender IP address' field MUST be set to all zeroes; this is to avoid polluting ARP caches in other hosts on the same link in the case where the address turns out to be already in use by another host. " When ARP proxy configured, it must not answer to both cases, because it is address conflict verification in any case. For Windows it is just causing to detect false "ip conflict". Already there is code for RFC5227, so just trivially we just check also if source ip == target ip. Signed-off-by: Denys Fedoryshchenko Signed-off-by: David S. Miller --- net/ipv4/arp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 9c220323f353..f11931c18381 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -801,8 +801,11 @@ static int arp_process(struct sk_buff *skb) * cache. */ - /* Special case: IPv4 duplicate address detection packet (RFC2131) */ - if (sip == 0) { + /* + * Special case: IPv4 duplicate address detection packet (RFC2131) + * and Gratuitous ARP/ARP Announce. (RFC3927, Section 2.4) + */ + if (sip == 0 || tip == sip) { if (arp->ar_op == htons(ARPOP_REQUEST) && inet_addr_type(net, tip) == RTN_LOCAL && !arp_ignore(in_dev, sip, tip)) -- cgit v1.2.3 From 8db09f26f912f7c90c764806e804b558da520d4f Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 13 Mar 2009 16:04:12 -0700 Subject: x25: '< 0' and '>= 0' test on unsigned skb->len is an unsigned int, so the test in x25_rx_call_request() always evaluates to true. len in x25_sendmsg() is unsigned as well. so -ERRORS returned by x25_output() are not noticed. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- net/x25/af_x25.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 1000e9a26fdb..9ca17b1ce52e 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -951,10 +951,8 @@ int x25_rx_call_request(struct sk_buff *skb, struct x25_neigh *nb, /* * Incoming Call User Data. */ - if (skb->len >= 0) { - skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len); - makex25->calluserdata.cudlength = skb->len; - } + skb_copy_from_linear_data(skb, makex25->calluserdata.cuddata, skb->len); + makex25->calluserdata.cudlength = skb->len; sk->sk_ack_backlog++; @@ -1122,8 +1120,9 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_flags & MSG_OOB) skb_queue_tail(&x25->interrupt_out_queue, skb); else { - len = x25_output(sk, skb); - if (len < 0) + rc = x25_output(sk, skb); + len = rc; + if (rc < 0) kfree_skb(skb); else if (x25->qbitincl) len++; -- cgit v1.2.3 From a2025b8b1039e5abaa38319b2eaab3b17867479a Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Fri, 13 Mar 2009 16:05:14 -0700 Subject: tcp: '< 0' test on unsigned promote 'cnt' to size_t, to match 'len'. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- net/ipv4/tcp_probe.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index 25524d4e372a..59f5b5e7c566 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -165,9 +165,10 @@ static int tcpprobe_sprint(char *tbuf, int n) static ssize_t tcpprobe_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { - int error = 0, cnt = 0; + int error = 0; + size_t cnt = 0; - if (!buf || len < 0) + if (!buf) return -EINVAL; while (cnt < len) { -- cgit v1.2.3 From 7cd0a63872ac6ef97265f07adc367ca4f984468e Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Sun, 15 Mar 2009 20:00:19 -0700 Subject: pkt_sched: Change misleading code in class delete. While looking for a possible reason of bugzilla report on HTB oops: http://bugzilla.kernel.org/show_bug.cgi?id=12858 I found the code in htb_delete calling htb_destroy_class on zero refcount is very misleading: it can suggest this is a common path, and destroy is called under sch_tree_lock. Actually, this can never happen like this because before deletion cops->get() is done, and after delete a class is still used by tclass_notify. The class destroy is always called from cops->put(), so without sch_tree_lock. This doesn't mean much now (since 2.6.27) because all vulnerable calls were moved from htb_destroy_class to htb_delete, but there was a bug in older kernels. The same change is done for other classful scheds, which, it seems, didn't have similar locking problems here. Reported-by: m0sia Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/sched/sch_cbq.c | 7 +++++-- net/sched/sch_drr.c | 7 +++++-- net/sched/sch_hfsc.c | 7 +++++-- net/sched/sch_htb.c | 7 +++++-- 4 files changed, 20 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 9e43ed949167..d728d8111732 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1960,8 +1960,11 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) cbq_rmprio(q, cl); sch_tree_unlock(sch); - if (--cl->refcnt == 0) - cbq_destroy_class(sch, cl); + BUG_ON(--cl->refcnt == 0); + /* + * This shouldn't happen: we "hold" one cops->get() when called + * from tc_ctl_tclass; the destroy method is done from cops->put(). + */ return 0; } diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index e36e94ab4e10..7597fe146866 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -155,8 +155,11 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg) drr_purge_queue(cl); qdisc_class_hash_remove(&q->clhash, &cl->common); - if (--cl->refcnt == 0) - drr_destroy_class(sch, cl); + BUG_ON(--cl->refcnt == 0); + /* + * This shouldn't happen: we "hold" one cops->get() when called + * from tc_ctl_tclass; the destroy method is done from cops->put(). + */ sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 74226b265528..5022f9c1f34b 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1139,8 +1139,11 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) hfsc_purge_queue(sch, cl); qdisc_class_hash_remove(&q->clhash, &cl->cl_common); - if (--cl->refcnt == 0) - hfsc_destroy_class(sch, cl); + BUG_ON(--cl->refcnt == 0); + /* + * This shouldn't happen: we "hold" one cops->get() when called + * from tc_ctl_tclass; the destroy method is done from cops->put(). + */ sch_tree_unlock(sch); return 0; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 355974f610c5..88cd02626621 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1275,8 +1275,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (last_child) htb_parent_to_leaf(q, cl, new_q); - if (--cl->refcnt == 0) - htb_destroy_class(sch, cl); + BUG_ON(--cl->refcnt == 0); + /* + * This shouldn't happen: we "hold" one cops->get() when called + * from tc_ctl_tclass; the destroy method is done from cops->put(). + */ sch_tree_unlock(sch); return 0; -- cgit v1.2.3 From 5861f8e58dd84fc34b691c2e8d4824dea68c360e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 14 Mar 2009 14:23:01 +0000 Subject: tcp: remove pointless .dsack/.num_sacks code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the pure assignment case, the earlier zeroing is still in effect. David S. Miller raised concerns if the ifs are there to avoid dirtying cachelines. I came to these conclusions: > We'll be dirty it anyway (now that I check), the first "real" statement > in tcp_rcv_established is: > > tp->rx_opt.saw_tstamp = 0; > > ...that'll land on the same dword. :-/ > > I suppose the blocks are there just because they had more complexity > inside when they had to calculate the eff_sacks too (maybe it would > have been better to just remove them in that drop-patch so you would > have had less head-ache :-)). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 7 ++----- net/ipv4/tcp_output.c | 3 +-- 2 files changed, 3 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5ecd7aa25979..cd39d1d02dc3 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4248,8 +4248,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) this_sack++; sp++; } - if (num_sacks != tp->rx_opt.num_sacks) - tp->rx_opt.num_sacks = num_sacks; + tp->rx_opt.num_sacks = num_sacks; } /* This one checks to see if we can put data from the @@ -4325,8 +4324,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) TCP_ECN_accept_cwr(tp, skb); - if (tp->rx_opt.dsack) - tp->rx_opt.dsack = 0; + tp->rx_opt.dsack = 0; /* Queue data for delivery to the user. * Packets in sequence go to the receive queue. @@ -4445,7 +4443,6 @@ drop: /* Initial out of order segment, build 1 SACK. */ if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; - tp->rx_opt.dsack = 0; tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; tp->selective_acks[0].end_seq = TCP_SKB_CB(skb)->end_seq; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index eb285befdf3b..325658039139 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -441,8 +441,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(sp[this_sack].end_seq); } - if (tp->rx_opt.dsack) - tp->rx_opt.dsack = 0; + tp->rx_opt.dsack = 0; } } -- cgit v1.2.3 From c43d558a5139a3b22dcac3f19f64ecb39130b02e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 14 Mar 2009 14:23:02 +0000 Subject: tcp: kill dead end_seq variable in clean_rtx_queue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I've already forgotten what for this was necessary, anyway it's no longer used (if it ever was). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cd39d1d02dc3..f527a16a7b33 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3201,7 +3201,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); - u32 end_seq; u32 acked_pcount; u8 sacked = scb->sacked; @@ -3216,10 +3215,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, break; fully_acked = 0; - end_seq = tp->snd_una; } else { acked_pcount = tcp_skb_pcount(skb); - end_seq = scb->end_seq; } /* MTU probing checks */ -- cgit v1.2.3 From c887e6d2d9aee56ee7c9f2af4cec3a5efdcc4c72 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 14 Mar 2009 14:23:03 +0000 Subject: tcp: consolidate paws check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wow, it was quite tricky to merge that stream of negations but I think I finally got it right: check & replace_ts_recent: (s32)(rcv_tsval - ts_recent) >= 0 => 0 (s32)(ts_recent - rcv_tsval) <= 0 => 0 discard: (s32)(ts_recent - rcv_tsval) > TCP_PAWS_WINDOW => 1 (s32)(ts_recent - rcv_tsval) <= TCP_PAWS_WINDOW => 0 I toggled the return values of tcp_paws_check around since the old encoding added yet-another negation making tracking of truth-values really complicated. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 11 +++++------ net/ipv4/tcp_minisocks.c | 4 ++-- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f527a16a7b33..b7d02c5dd6da 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3883,8 +3883,7 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) * Not only, also it occurs for expired timestamps. */ - if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) >= 0 || - get_seconds() >= tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS) + if (tcp_paws_check(&tp->rx_opt, 0)) tcp_store_ts_recent(tp); } } @@ -3936,9 +3935,9 @@ static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb) { const struct tcp_sock *tp = tcp_sk(sk); - return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW && - get_seconds() < tp->rx_opt.ts_recent_stamp + TCP_PAWS_24DAYS && - !tcp_disordered_ack(sk, skb)); + + return !tcp_paws_check(&tp->rx_opt, TCP_PAWS_WINDOW) && + !tcp_disordered_ack(sk, skb); } /* Check segment sequence number for validity. @@ -5513,7 +5512,7 @@ discard: /* PAWS check. */ if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && - tcp_paws_check(&tp->rx_opt, 0)) + tcp_paws_reject(&tp->rx_opt, 0)) goto discard_and_undo; if (th->syn) { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4b0df3e6b609..43bbba7926ee 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -107,7 +107,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (tmp_opt.saw_tstamp) { tmp_opt.ts_recent = tcptw->tw_ts_recent; tmp_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp; - paws_reject = tcp_paws_check(&tmp_opt, th->rst); + paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } @@ -511,7 +511,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * from another data. */ tmp_opt.ts_recent_stamp = get_seconds() - ((TCP_TIMEOUT_INIT/HZ)<retrans); - paws_reject = tcp_paws_check(&tmp_opt, th->rst); + paws_reject = tcp_paws_reject(&tmp_opt, th->rst); } } -- cgit v1.2.3 From 72211e90501f954f586481c25521c3724cda3cc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 14 Mar 2009 14:23:04 +0000 Subject: tcp: don't check mtu probe completion in the loop MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that no variables clash such that we couldn't do the check just once later on. Therefore move it. Also kill dead obvious comment, dead argument and add unlikely since this mtu probe does not happen too often. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b7d02c5dd6da..311c30f73ee4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2836,7 +2836,7 @@ static void tcp_mtup_probe_failed(struct sock *sk) icsk->icsk_mtup.probe_size = 0; } -static void tcp_mtup_probe_success(struct sock *sk, struct sk_buff *skb) +static void tcp_mtup_probe_success(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -3219,12 +3219,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, acked_pcount = tcp_skb_pcount(skb); } - /* MTU probing checks */ - if (fully_acked && icsk->icsk_mtup.probe_size && - !after(tp->mtu_probe.probe_seq_end, scb->end_seq)) { - tcp_mtup_probe_success(sk, skb); - } - if (sacked & TCPCB_RETRANS) { if (sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= acked_pcount; @@ -3287,6 +3281,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + if (unlikely(icsk->icsk_mtup.probe_size && + !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { + tcp_mtup_probe_success(sk); + } + tcp_ack_update_rtt(sk, flag, seq_rtt); tcp_rearm_rto(sk); -- cgit v1.2.3 From 0c54b85f2828128274f319a1eb3ce7f604fe2a53 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 14 Mar 2009 14:23:05 +0000 Subject: tcp: simplify tcp_current_mss MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There's very little need for most of the callsites to get tp->xmit_goal_size updated. That will cost us divide as is, so slice the function in two. Also, the only users of the tp->xmit_goal_size are directly behind tcp_current_mss(), so there's no need to store that variable into tcp_sock at all! The drop of xmit_goal_size currently leaves 16-bit hole and some reorganization would again be necessary to change that (but I'm aiming to fill that hole with u16 xmit_goal_size_segs to cache the results of the remaining divide to get that tso on regression). Bring xmit_goal_size parts into tcp.c Signed-off-by: Ilpo Järvinen Cc: Evgeniy Polyakov Cc: Ingo Molnar Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 43 +++++++++++++++++++++++++++++++++++-------- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 41 +++++++---------------------------------- 3 files changed, 43 insertions(+), 43 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d3f9beee74c0..886596ff0aae 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -661,6 +661,37 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp) return NULL; } +static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, + int large_allowed) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 xmit_size_goal; + + xmit_size_goal = mss_now; + + if (large_allowed && sk_can_gso(sk)) { + xmit_size_goal = ((sk->sk_gso_max_size - 1) - + inet_csk(sk)->icsk_af_ops->net_header_len - + inet_csk(sk)->icsk_ext_hdr_len - + tp->tcp_header_len); + + xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); + xmit_size_goal -= (xmit_size_goal % mss_now); + } + + return xmit_size_goal; +} + +static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) +{ + int mss_now; + + mss_now = tcp_current_mss(sk); + *size_goal = tcp_xmit_size_goal(sk, mss_now, !(flags & MSG_OOB)); + + return mss_now; +} + static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffset, size_t psize, int flags) { @@ -677,8 +708,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); - size_goal = tp->xmit_size_goal; + mss_now = tcp_send_mss(sk, &size_goal, flags); copied = 0; err = -EPIPE; @@ -761,8 +791,7 @@ wait_for_memory: if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); - size_goal = tp->xmit_size_goal; + mss_now = tcp_send_mss(sk, &size_goal, flags); } out: @@ -844,8 +873,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, /* This should be in poll */ clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); - size_goal = tp->xmit_size_goal; + mss_now = tcp_send_mss(sk, &size_goal, flags); /* Ok commence sending. */ iovlen = msg->msg_iovlen; @@ -1007,8 +1035,7 @@ wait_for_memory: if ((err = sk_stream_wait_memory(sk, &timeo)) != 0) goto do_error; - mss_now = tcp_current_mss(sk, !(flags&MSG_OOB)); - size_goal = tp->xmit_size_goal; + mss_now = tcp_send_mss(sk, &size_goal, flags); } } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 311c30f73ee4..fae78e3eccc4 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2864,7 +2864,7 @@ void tcp_simple_retransmit(struct sock *sk) const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - unsigned int mss = tcp_current_mss(sk, 0); + unsigned int mss = tcp_current_mss(sk); u32 prior_lost = tp->lost_out; tcp_for_write_queue(skb, sk) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 325658039139..c1f259d2d33b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -921,7 +921,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) * factor and mss. */ if (tcp_skb_pcount(skb) > 1) - tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); + tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk)); return 0; } @@ -982,15 +982,6 @@ void tcp_mtup_init(struct sock *sk) icsk->icsk_mtup.probe_size = 0; } -/* Bound MSS / TSO packet size with the half of the window */ -static int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) -{ - if (tp->max_window && pktsize > (tp->max_window >> 1)) - return max(tp->max_window >> 1, 68U - tp->tcp_header_len); - else - return pktsize; -} - /* This function synchronize snd mss to current pmtu/exthdr set. tp->rx_opt.user_mss is mss set by user by TCP_MAXSEG. It does NOT counts @@ -1037,22 +1028,17 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. */ -unsigned int tcp_current_mss(struct sock *sk, int large_allowed) +unsigned int tcp_current_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); u32 mss_now; - u16 xmit_size_goal; - int doing_tso = 0; unsigned header_len; struct tcp_out_options opts; struct tcp_md5sig_key *md5; mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk)) - doing_tso = 1; - if (dst) { u32 mtu = dst_mtu(dst); if (mtu != inet_csk(sk)->icsk_pmtu_cookie) @@ -1070,19 +1056,6 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now -= delta; } - xmit_size_goal = mss_now; - - if (doing_tso) { - xmit_size_goal = ((sk->sk_gso_max_size - 1) - - inet_csk(sk)->icsk_af_ops->net_header_len - - inet_csk(sk)->icsk_ext_hdr_len - - tp->tcp_header_len); - - xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); - xmit_size_goal -= (xmit_size_goal % mss_now); - } - tp->xmit_size_goal = xmit_size_goal; - return mss_now; } @@ -1264,7 +1237,7 @@ int tcp_may_send_now(struct sock *sk) struct sk_buff *skb = tcp_send_head(sk); return (skb && - tcp_snd_test(sk, skb, tcp_current_mss(sk, 1), + tcp_snd_test(sk, skb, tcp_current_mss(sk), (tcp_skb_is_last(sk, skb) ? tp->nonagle : TCP_NAGLE_PUSH))); } @@ -1421,7 +1394,7 @@ static int tcp_mtu_probe(struct sock *sk) return -1; /* Very simple search strategy: just double the MSS. */ - mss_now = tcp_current_mss(sk, 0); + mss_now = tcp_current_mss(sk); probe_size = 2 * tp->mss_cache; size_needed = probe_size + (tp->reordering + 1) * tp->mss_cache; if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high)) { @@ -1903,7 +1876,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) return -EHOSTUNREACH; /* Routing failure or similar. */ - cur_mss = tcp_current_mss(sk, 0); + cur_mss = tcp_current_mss(sk); /* If receiver has shrunk his window, and skb is out of * new window, do not retransmit it. The exception is the @@ -2111,7 +2084,7 @@ void tcp_send_fin(struct sock *sk) * unsent frames. But be careful about outgoing SACKS * and IP options. */ - mss_now = tcp_current_mss(sk, 1); + mss_now = tcp_current_mss(sk); if (tcp_send_head(sk) != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; @@ -2523,7 +2496,7 @@ int tcp_write_wakeup(struct sock *sk) if ((skb = tcp_send_head(sk)) != NULL && before(TCP_SKB_CB(skb)->seq, tcp_wnd_end(tp))) { int err; - unsigned int mss = tcp_current_mss(sk, 0); + unsigned int mss = tcp_current_mss(sk); unsigned int seg_size = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) -- cgit v1.2.3 From 2a3a041c4e2c1685e668b280c121a5a40a029a03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 14 Mar 2009 22:45:16 +0000 Subject: tcp: cache result of earlier divides when mss-aligning things MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The results is very unlikely change every so often so we hardly need to divide again after doing that once for a connection. Yet, if divide still becomes necessary we detect that and do the right thing and again settle for non-divide state. Takes the u16 space which was previously taken by the plain xmit_size_goal. This should take care part of the tso vs non-tso difference we found earlier. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 886596ff0aae..0db9f3b984f7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -665,7 +665,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); - u32 xmit_size_goal; + u32 xmit_size_goal, old_size_goal; xmit_size_goal = mss_now; @@ -676,7 +676,17 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, tp->tcp_header_len); xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); - xmit_size_goal -= (xmit_size_goal % mss_now); + + /* We try hard to avoid divides here */ + old_size_goal = tp->xmit_size_goal_segs * mss_now; + + if (likely(old_size_goal <= xmit_size_goal && + old_size_goal + mss_now > xmit_size_goal)) { + xmit_size_goal = old_size_goal; + } else { + tp->xmit_size_goal_segs = xmit_size_goal / mss_now; + xmit_size_goal = tp->xmit_size_goal_segs * mss_now; + } } return xmit_size_goal; -- cgit v1.2.3 From afece1c6587010cc81d1a43045c855774e8234a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 14 Mar 2009 14:23:07 +0000 Subject: tcp: make sure xmit goal size never becomes zero MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's not too likely to happen, would basically require crafted packets (must hit the max guard in tcp_bound_to_half_wnd()). It seems that nothing that bad would happen as there's tcp_mems and congestion window that prevent runaway at some point from hurting all too much (I'm not that sure what all those zero sized segments we would generate do though in write queue). Preventing it regardless is certainly the best way to go. Signed-off-by: Ilpo Järvinen Cc: Evgeniy Polyakov Cc: Ingo Molnar Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0db9f3b984f7..1c4d42ff72bd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -689,7 +689,7 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, } } - return xmit_size_goal; + return max(xmit_size_goal, mss_now); } static int tcp_send_mss(struct sock *sk, int *size_goal, int flags) -- cgit v1.2.3 From ca735b3aaa945626ba65a3e51145bfe4ecd9e222 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 16 Mar 2009 14:54:21 +0100 Subject: netfilter: use a linked list of loggers This patch modifies nf_log to use a linked list of loggers for each protocol. This list of loggers is read and write protected with a mutex. This patch separates registration and binding. To be used as logging module, a module has to register calling nf_log_register() and to bind to a protocol it has to call nf_log_bind_pf(). This patch also converts the logging modules to the new API. For nfnetlink_log, it simply switchs call to register functions to call to bind function and adds a call to nf_log_register() during init. For other modules, it just remove a const flag from the logger structure and replace it with a __read_mostly. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ipt_LOG.c | 2 +- net/ipv4/netfilter/ipt_ULOG.c | 2 +- net/ipv6/netfilter/ip6t_LOG.c | 2 +- net/netfilter/nf_log.c | 90 ++++++++++++++++++++++++++++--------------- net/netfilter/nfnetlink_log.c | 18 ++++++--- 5 files changed, 76 insertions(+), 38 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 27a78fbbd92b..acc44c69eb68 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -464,7 +464,7 @@ static struct xt_target log_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static const struct nf_logger ipt_log_logger ={ +static struct nf_logger ipt_log_logger __read_mostly = { .name = "ipt_LOG", .logfn = &ipt_log_packet, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 18a2826b57c6..d32cc4bb328a 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -379,7 +379,7 @@ static struct xt_target ulog_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_logger ipt_ulog_logger = { +static struct nf_logger ipt_ulog_logger __read_mostly = { .name = "ipt_ULOG", .logfn = ipt_logfn, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 37adf5abc51e..7018cac4fddc 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -477,7 +477,7 @@ static struct xt_target log_tg6_reg __read_mostly = { .me = THIS_MODULE, }; -static const struct nf_logger ip6t_logger = { +static struct nf_logger ip6t_logger __read_mostly = { .name = "ip6t_LOG", .logfn = &ip6t_log_packet, .me = THIS_MODULE, diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index fa8ae5d2659c..a228b5fbcf7c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -16,56 +16,60 @@ #define NF_LOG_PREFIXLEN 128 static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; +static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); -/* return EBUSY if somebody else is registered, EEXIST if the same logger - * is registred, 0 on success. */ -int nf_log_register(u_int8_t pf, const struct nf_logger *logger) +static struct nf_logger *__find_logger(int pf, const char *str_logger) { - int ret; + struct nf_logger *t; - if (pf >= ARRAY_SIZE(nf_loggers)) - return -EINVAL; - - /* Any setup of logging members must be done before - * substituting pointer. */ - ret = mutex_lock_interruptible(&nf_log_mutex); - if (ret < 0) - return ret; - - if (!nf_loggers[pf]) - rcu_assign_pointer(nf_loggers[pf], logger); - else if (nf_loggers[pf] == logger) - ret = -EEXIST; - else - ret = -EBUSY; + list_for_each_entry(t, &nf_loggers_l[pf], list[pf]) { + if (!strnicmp(str_logger, t->name, strlen(t->name))) + return t; + } - mutex_unlock(&nf_log_mutex); - return ret; + return NULL; } -EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister_pf(u_int8_t pf) +/* return EEXIST if the same logger is registred, 0 on success. */ +int nf_log_register(u_int8_t pf, struct nf_logger *logger) { + const struct nf_logger *llog; + if (pf >= ARRAY_SIZE(nf_loggers)) - return; + return -EINVAL; + mutex_lock(&nf_log_mutex); - rcu_assign_pointer(nf_loggers[pf], NULL); + + if (pf == NFPROTO_UNSPEC) { + int i; + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) + list_add_tail(&(logger->list[i]), &(nf_loggers_l[i])); + } else { + /* register at end of list to honor first register win */ + list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); + llog = rcu_dereference(nf_loggers[pf]); + if (llog == NULL) + rcu_assign_pointer(nf_loggers[pf], logger); + } + mutex_unlock(&nf_log_mutex); - /* Give time to concurrent readers. */ - synchronize_rcu(); + return 0; } -EXPORT_SYMBOL(nf_log_unregister_pf); +EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister(const struct nf_logger *logger) +void nf_log_unregister(struct nf_logger *logger) { + const struct nf_logger *c_logger; int i; mutex_lock(&nf_log_mutex); for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { - if (nf_loggers[i] == logger) + c_logger = rcu_dereference(nf_loggers[i]); + if (c_logger == logger) rcu_assign_pointer(nf_loggers[i], NULL); + list_del(&logger->list[i]); } mutex_unlock(&nf_log_mutex); @@ -73,6 +77,27 @@ void nf_log_unregister(const struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_unregister); +int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) +{ + mutex_lock(&nf_log_mutex); + if (__find_logger(pf, logger->name) == NULL) { + mutex_unlock(&nf_log_mutex); + return -ENOENT; + } + rcu_assign_pointer(nf_loggers[pf], logger); + mutex_unlock(&nf_log_mutex); + return 0; +} +EXPORT_SYMBOL(nf_log_bind_pf); + +void nf_log_unbind_pf(u_int8_t pf) +{ + mutex_lock(&nf_log_mutex); + rcu_assign_pointer(nf_loggers[pf], NULL); + mutex_unlock(&nf_log_mutex); +} +EXPORT_SYMBOL(nf_log_unbind_pf); + void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, @@ -163,10 +188,15 @@ static const struct file_operations nflog_file_ops = { int __init netfilter_log_init(void) { + int i; #ifdef CONFIG_PROC_FS if (!proc_create("nf_log", S_IRUGO, proc_net_netfilter, &nflog_file_ops)) return -1; #endif + + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) + INIT_LIST_HEAD(&(nf_loggers_l[i])); + return 0; } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fa49dc7fe100..3eae3fca29d8 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -691,7 +691,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, return -ENOTSUPP; } -static const struct nf_logger nfulnl_logger = { +static struct nf_logger nfulnl_logger __read_mostly = { .name = "nfnetlink_log", .logfn = &nfulnl_log_packet, .me = THIS_MODULE, @@ -723,9 +723,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, /* Commands without queue context */ switch (cmd->command) { case NFULNL_CFG_CMD_PF_BIND: - return nf_log_register(pf, &nfulnl_logger); + return nf_log_bind_pf(pf, &nfulnl_logger); case NFULNL_CFG_CMD_PF_UNBIND: - nf_log_unregister_pf(pf); + nf_log_unbind_pf(pf); return 0; } } @@ -950,17 +950,25 @@ static int __init nfnetlink_log_init(void) goto cleanup_netlink_notifier; } + status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); + if (status < 0) { + printk(KERN_ERR "log: failed to register logger\n"); + goto cleanup_subsys; + } + #ifdef CONFIG_PROC_FS if (!proc_create("nfnetlink_log", 0440, proc_net_netfilter, &nful_file_ops)) - goto cleanup_subsys; + goto cleanup_logger; #endif return status; #ifdef CONFIG_PROC_FS +cleanup_logger: + nf_log_unregister(&nfulnl_logger); +#endif cleanup_subsys: nfnetlink_subsys_unregister(&nfulnl_subsys); -#endif cleanup_netlink_notifier: netlink_unregister_notifier(&nfulnl_rtnl_notifier); return status; -- cgit v1.2.3 From c7a913cd5535554d6f5d5e1f5ef46c4307cf2afc Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 16 Mar 2009 14:55:27 +0100 Subject: netfilter: print the list of register loggers This patch modifies the proc output to add display of registered loggers. The content of /proc/net/netfilter/nf_log is modified. Instead of displaying a protocol per line with format: proto:logger it now displays: proto:logger (comma_separated_list_of_loggers) NONE is used as keyword if no logger is used. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/netfilter/nf_log.c | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index a228b5fbcf7c..4fcbcc71aa32 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -154,13 +154,37 @@ static int seq_show(struct seq_file *s, void *v) { loff_t *pos = v; const struct nf_logger *logger; + struct nf_logger *t; + int ret; logger = rcu_dereference(nf_loggers[*pos]); if (!logger) - return seq_printf(s, "%2lld NONE\n", *pos); + ret = seq_printf(s, "%2lld NONE (", *pos); + else + ret = seq_printf(s, "%2lld %s (", *pos, logger->name); + + if (ret < 0) + return ret; + + mutex_lock(&nf_log_mutex); + list_for_each_entry(t, &nf_loggers_l[*pos], list[*pos]) { + ret = seq_printf(s, "%s", t->name); + if (ret < 0) { + mutex_unlock(&nf_log_mutex); + return ret; + } + if (&t->list[*pos] != nf_loggers_l[*pos].prev) { + ret = seq_printf(s, ","); + if (ret < 0) { + mutex_unlock(&nf_log_mutex); + return ret; + } + } + } + mutex_unlock(&nf_log_mutex); - return seq_printf(s, "%2lld %s\n", *pos, logger->name); + return seq_printf(s, ")\n"); } static const struct seq_operations nflog_seq_ops = { -- cgit v1.2.3 From 9d2493f88f846b391a15a736efc7f4b97d6c4046 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 16 Mar 2009 15:15:35 +0100 Subject: netfilter: remove IPvX specific parts from nf_conntrack_l4proto.h Moving the structure definitions to the corresponding IPvX specific header files. Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 1 + net/netfilter/nf_conntrack_proto_tcp.c | 2 ++ net/netfilter/nf_conntrack_proto_udp.c | 2 ++ 3 files changed, 5 insertions(+) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 727b9530448a..e6852f617217 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -26,6 +26,7 @@ #include #include #include +#include static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a1edb9c1adee..7d3944f02ea1 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include /* Protects ct->proto.tcp */ static DEFINE_RWLOCK(tcp_lock); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 2b8b1f579f93..d4021179e24e 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -22,6 +22,8 @@ #include #include #include +#include +#include static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; -- cgit v1.2.3 From 67c0d57930ff9a24c6c34abee1b01f7716a9b0e2 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 16 Mar 2009 15:17:23 +0100 Subject: netfilter: Kconfig spelling fixes (trivial) Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/Kconfig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f8d6180938d5..1833bdbf9805 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -31,7 +31,7 @@ config NF_CONNTRACK_PROC_COMPAT default y help This option enables /proc and sysctl compatibility with the old - layer 3 dependant connection tracking. This is needed to keep + layer 3 dependent connection tracking. This is needed to keep old programs that have not been adapted to the new names working. If unsure, say Y. @@ -99,7 +99,7 @@ config IP_NF_MATCH_TTL ---help--- This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects - COFNIG_NETFILTER_XT_MATCH_HL. + CONFIG_NETFILTER_XT_MATCH_HL. # `filter', generic and specific targets config IP_NF_FILTER @@ -329,7 +329,7 @@ config IP_NF_TARGET_TTL ---help--- This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects - COFNIG_NETFILTER_XT_TARGET_HL. + CONFIG_NETFILTER_XT_TARGET_HL. # raw + specific targets config IP_NF_RAW -- cgit v1.2.3 From 1db7a748dfd50d7615913730763c024444900030 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Mar 2009 15:18:50 +0100 Subject: netfilter: conntrack: increase drop stats if sequence adjustment fails This patch increases the statistics of packets drop if the sequence adjustment fails in ipv4_confirm(). Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4beb04fac588..8b681f24e271 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -120,8 +120,10 @@ static unsigned int ipv4_confirm(unsigned int hooknum, typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); - if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) + if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; + } } out: /* We've seen it coming out the other side: confirm it */ -- cgit v1.2.3 From 7ec4749675bf33ea639bbcca8a5365ccc5091a6a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Mar 2009 15:25:46 +0100 Subject: netfilter: ctnetlink: cleanup master conntrack assignation This patch moves the assignation of the master conntrack to ctnetlink_create_conntrack(), which is where it really belongs. This patch is a cleanup. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 49 +++++++++++++++--------------------- 1 file changed, 20 insertions(+), 29 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index cb78aa00399e..cca22d553826 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1128,9 +1128,9 @@ static int ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, - struct nf_conn *master_ct, u32 pid, - int report) + int report, + u8 u3) { struct nf_conn *ct; int err = -EINVAL; @@ -1241,7 +1241,22 @@ ctnetlink_create_conntrack(struct nlattr *cda[], #endif /* setup master conntrack: this is a confirmed expectation */ - if (master_ct) { + if (cda[CTA_TUPLE_MASTER]) { + struct nf_conntrack_tuple master; + struct nf_conntrack_tuple_hash *master_h; + struct nf_conn *master_ct; + + err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3); + if (err < 0) + goto err; + + master_h = __nf_conntrack_find(&init_net, &master); + if (master_h == NULL) { + err = -ENOENT; + goto err; + } + master_ct = nf_ct_tuplehash_to_ctrack(master_h); + nf_conntrack_get(&master_ct->ct_general); __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = master_ct; } @@ -1289,39 +1304,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, h = __nf_conntrack_find(&init_net, &rtuple); if (h == NULL) { - struct nf_conntrack_tuple master; - struct nf_conntrack_tuple_hash *master_h = NULL; - struct nf_conn *master_ct = NULL; - - if (cda[CTA_TUPLE_MASTER]) { - err = ctnetlink_parse_tuple(cda, - &master, - CTA_TUPLE_MASTER, - u3); - if (err < 0) - goto out_unlock; - - master_h = __nf_conntrack_find(&init_net, &master); - if (master_h == NULL) { - err = -ENOENT; - goto out_unlock; - } - master_ct = nf_ct_tuplehash_to_ctrack(master_h); - nf_conntrack_get(&master_ct->ct_general); - } - err = -ENOENT; if (nlh->nlmsg_flags & NLM_F_CREATE) err = ctnetlink_create_conntrack(cda, &otuple, &rtuple, - master_ct, NETLINK_CB(skb).pid, - nlmsg_report(nlh)); + nlmsg_report(nlh), + u3); spin_unlock_bh(&nf_conntrack_lock); - if (err < 0 && master_ct) - nf_ct_put(master_ct); - return err; } /* implicit 'else' */ -- cgit v1.2.3 From e098360f159b3358f085543eb6dc2eb500d6667c Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Mar 2009 15:27:22 +0100 Subject: netfilter: ctnetlink: cleanup conntrack update preliminary checkings This patch moves the preliminary checkings that must be fulfilled to update a conntrack, which are the following: * NAT manglings cannot be updated * Changing the master conntrack is not allowed. This patch is a cleanup. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index cca22d553826..b67db695d83c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1062,6 +1062,10 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) { int err; + /* only allow NAT changes and master assignation for new conntracks */ + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST] || cda[CTA_TUPLE_MASTER]) + return -EOPNOTSUPP; + if (cda[CTA_HELP]) { err = ctnetlink_change_helper(ct, cda); if (err < 0) @@ -1323,17 +1327,6 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = -EOPNOTSUPP; - goto out_unlock; - } - /* can't link an existing conntrack to a master */ - if (cda[CTA_TUPLE_MASTER]) { - err = -EOPNOTSUPP; - goto out_unlock; - } - err = ctnetlink_change_conntrack(ct, cda); if (err == 0) { nf_conntrack_get(&ct->ct_general); -- cgit v1.2.3 From f0a3c0869f3b0ef93d9df044e9a41e40086d4c97 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Mar 2009 15:28:09 +0100 Subject: netfilter: ctnetlink: move event reporting for new entries outside the lock This patch moves the event reporting outside the lock section. With this patch, the creation and update of entries is homogeneous from the event reporting perspective. Moreover, as the event reporting is done outside the lock section, the netlink broadcast delivery can benefit of the yield() call under congestion. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 41 ++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b67db695d83c..9fb7cf7504fa 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1128,12 +1128,10 @@ ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report) report); } -static int +static struct nf_conn * ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, - u32 pid, - int report, u8 u3) { struct nf_conn *ct; @@ -1142,7 +1140,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (!cda[CTA_TIMEOUT]) goto err; @@ -1265,18 +1263,14 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct->master = master_ct; } - nf_conntrack_get(&ct->ct_general); add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); rcu_read_unlock(); - ctnetlink_event_report(ct, pid, report); - nf_ct_put(ct); - - return 0; + return ct; err: nf_conntrack_free(ct); - return err; + return ERR_PTR(err); } static int @@ -1309,14 +1303,25 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (h == NULL) { err = -ENOENT; - if (nlh->nlmsg_flags & NLM_F_CREATE) - err = ctnetlink_create_conntrack(cda, - &otuple, - &rtuple, - NETLINK_CB(skb).pid, - nlmsg_report(nlh), - u3); - spin_unlock_bh(&nf_conntrack_lock); + if (nlh->nlmsg_flags & NLM_F_CREATE) { + struct nf_conn *ct; + + ct = ctnetlink_create_conntrack(cda, &otuple, + &rtuple, u3); + if (IS_ERR(ct)) { + err = PTR_ERR(ct); + goto out_unlock; + } + err = 0; + nf_conntrack_get(&ct->ct_general); + spin_unlock_bh(&nf_conntrack_lock); + ctnetlink_event_report(ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + nf_ct_put(ct); + } else + spin_unlock_bh(&nf_conntrack_lock); + return err; } /* implicit 'else' */ -- cgit v1.2.3 From 26c3b6780618f09abb5f7e03b09b13dbb8e8aa24 Mon Sep 17 00:00:00 2001 From: Scott James Remnant Date: Mon, 16 Mar 2009 15:30:14 +0100 Subject: netfilter: auto-load ip6_queue module when socket opened The ip6_queue module is missing the net-pf-16-proto-13 alias that would cause it to be auto-loaded when a socket of that type is opened. This patch adds the alias. Signed-off-by: Scott James Remnant Signed-off-by: Tim Gardner Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/ip6_queue.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5859c046cbc4..b693f841aeb4 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -643,6 +643,7 @@ static void __exit ip6_queue_fini(void) MODULE_DESCRIPTION("IPv6 packet queue handler"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW); module_init(ip6_queue_init); module_exit(ip6_queue_fini); -- cgit v1.2.3 From 95ba434f898c3cb5c7457dce265bf0ab72ba8ce9 Mon Sep 17 00:00:00 2001 From: Scott James Remnant Date: Mon, 16 Mar 2009 15:31:10 +0100 Subject: netfilter: auto-load ip_queue module when socket opened The ip_queue module is missing the net-pf-16-proto-3 alias that would causae it to be auto-loaded when a socket of that type is opened. This patch adds the alias. Signed-off-by: Scott James Remnant Signed-off-by: Tim Gardner Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/ip_queue.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 432ce9d1c11c..5f22c91c6e15 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -640,6 +641,7 @@ static void __exit ip_queue_fini(void) MODULE_DESCRIPTION("IPv4 packet queue handler"); MODULE_AUTHOR("James Morris "); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); module_init(ip_queue_init); module_exit(ip_queue_fini); -- cgit v1.2.3 From 76398425bb06b07cc3a3b1ce169c67dc9d6874ed Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 1 Feb 2009 14:26:59 -0700 Subject: Move FASYNC bit handling to f_op->fasync() Removing the BKL from FASYNC handling ran into the challenge of keeping the setting of the FASYNC bit in filp->f_flags atomic with regard to calls to the underlying fasync() function. Andi Kleen suggested moving the handling of that bit into fasync(); this patch does exactly that. As a result, we have a couple of internal API changes: fasync() must now manage the FASYNC bit, and it will be called without the BKL held. As it happens, every fasync() implementation in the kernel with one exception calls fasync_helper(). So, if we make fasync_helper() set the FASYNC bit, we can avoid making any changes to the other fasync() functions - as long as those functions, themselves, have proper locking. Most fasync() implementations do nothing but call fasync_helper() - which has its own lock - so they are easily verified as correct. The BKL had already been pushed down into the rest. The networking code has its own version of fasync_helper(), so that code has been augmented with explicit FASYNC bit handling. Cc: Al Viro Cc: David Miller Reviewed-by: Christoph Hellwig Signed-off-by: Jonathan Corbet --- net/socket.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 35dd7371752a..0f75746ab06e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1030,6 +1030,13 @@ static int sock_fasync(int fd, struct file *filp, int on) lock_sock(sk); + spin_lock(&filp->f_lock); + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + spin_unlock(&filp->f_lock); + prev = &(sock->fasync_list); for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) -- cgit v1.2.3 From acc738fec03bdaa5b77340c32a82fbfedaaabef0 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Mon, 16 Mar 2009 15:35:29 +0100 Subject: netfilter: xtables: avoid pointer to self Commit 784544739a25c30637397ace5489eeb6e15d7d49 (netfilter: iptables: lock free counters) broke a number of modules whose rule data referenced itself. A reallocation would not reestablish the correct references, so it is best to use a separate struct that does not fall under RCU. Signed-off-by: Jan Engelhardt Signed-off-by: Patrick McHardy --- net/netfilter/xt_limit.c | 40 +++++++++++++++++++++++++++++----------- net/netfilter/xt_quota.c | 31 ++++++++++++++++++++++++------- net/netfilter/xt_statistic.c | 28 +++++++++++++++++++++++----- 3 files changed, 76 insertions(+), 23 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index c908d69a5595..2e8089ecd0af 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -14,6 +14,11 @@ #include #include +struct xt_limit_priv { + unsigned long prev; + uint32_t credit; +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Herve Eychenne "); MODULE_DESCRIPTION("Xtables: rate-limit match"); @@ -60,18 +65,18 @@ static DEFINE_SPINLOCK(limit_lock); static bool limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct xt_rateinfo *r = - ((const struct xt_rateinfo *)par->matchinfo)->master; + const struct xt_rateinfo *r = par->matchinfo; + struct xt_limit_priv *priv = r->master; unsigned long now = jiffies; spin_lock_bh(&limit_lock); - r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY; - if (r->credit > r->credit_cap) - r->credit = r->credit_cap; + priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY; + if (priv->credit > r->credit_cap) + priv->credit = r->credit_cap; - if (r->credit >= r->cost) { + if (priv->credit >= r->cost) { /* We're not limited. */ - r->credit -= r->cost; + priv->credit -= r->cost; spin_unlock_bh(&limit_lock); return true; } @@ -95,6 +100,7 @@ user2credits(u_int32_t user) static bool limit_mt_check(const struct xt_mtchk_param *par) { struct xt_rateinfo *r = par->matchinfo; + struct xt_limit_priv *priv; /* Check for overflow. */ if (r->burst == 0 @@ -104,19 +110,30 @@ static bool limit_mt_check(const struct xt_mtchk_param *par) return false; } - /* For SMP, we only want to use one set of counters. */ - r->master = r; + priv = kmalloc(sizeof(*priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + + /* For SMP, we only want to use one set of state. */ + r->master = priv; if (r->cost == 0) { /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * 128. */ - r->prev = jiffies; - r->credit = user2credits(r->avg * r->burst); /* Credits full. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ r->cost = user2credits(r->avg); } return true; } +static void limit_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_rateinfo *info = par->matchinfo; + + kfree(info->master); +} + #ifdef CONFIG_COMPAT struct compat_xt_rateinfo { u_int32_t avg; @@ -167,6 +184,7 @@ static struct xt_match limit_mt_reg __read_mostly = { .family = NFPROTO_UNSPEC, .match = limit_mt, .checkentry = limit_mt_check, + .destroy = limit_mt_destroy, .matchsize = sizeof(struct xt_rateinfo), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct compat_xt_rateinfo), diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index c84fce5e0f3e..01dd07b764ec 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -9,6 +9,10 @@ #include #include +struct xt_quota_priv { + uint64_t quota; +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sam Johnston "); MODULE_DESCRIPTION("Xtables: countdown quota match"); @@ -20,18 +24,20 @@ static DEFINE_SPINLOCK(quota_lock); static bool quota_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct xt_quota_info *q = - ((const struct xt_quota_info *)par->matchinfo)->master; + struct xt_quota_info *q = (void *)par->matchinfo; + struct xt_quota_priv *priv = q->master; bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh("a_lock); - if (q->quota >= skb->len) { - q->quota -= skb->len; + if (priv->quota >= skb->len) { + priv->quota -= skb->len; ret = !ret; } else { /* we do not allow even small packets from now on */ - q->quota = 0; + priv->quota = 0; } + /* Copy quota back to matchinfo so that iptables can display it */ + q->quota = priv->quota; spin_unlock_bh("a_lock); return ret; @@ -43,17 +49,28 @@ static bool quota_mt_check(const struct xt_mtchk_param *par) if (q->flags & ~XT_QUOTA_MASK) return false; - /* For SMP, we only want to use one set of counters. */ - q->master = q; + + q->master = kmalloc(sizeof(*q->master), GFP_KERNEL); + if (q->master == NULL) + return -ENOMEM; + return true; } +static void quota_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_quota_info *q = par->matchinfo; + + kfree(q->master); +} + static struct xt_match quota_mt_reg __read_mostly = { .name = "quota", .revision = 0, .family = NFPROTO_UNSPEC, .match = quota_mt, .checkentry = quota_mt_check, + .destroy = quota_mt_destroy, .matchsize = sizeof(struct xt_quota_info), .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 0d75141139d5..d8c0f8f1a78e 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -16,6 +16,10 @@ #include #include +struct xt_statistic_priv { + uint32_t count; +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)"); @@ -27,7 +31,7 @@ static DEFINE_SPINLOCK(nth_lock); static bool statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct xt_statistic_info *info = (void *)par->matchinfo; + const struct xt_statistic_info *info = par->matchinfo; bool ret = info->flags & XT_STATISTIC_INVERT; switch (info->mode) { @@ -36,10 +40,9 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) ret = !ret; break; case XT_STATISTIC_MODE_NTH: - info = info->master; spin_lock_bh(&nth_lock); - if (info->u.nth.count++ == info->u.nth.every) { - info->u.nth.count = 0; + if (info->master->count++ == info->u.nth.every) { + info->master->count = 0; ret = !ret; } spin_unlock_bh(&nth_lock); @@ -56,16 +59,31 @@ static bool statistic_mt_check(const struct xt_mtchk_param *par) if (info->mode > XT_STATISTIC_MODE_MAX || info->flags & ~XT_STATISTIC_MASK) return false; - info->master = info; + + info->master = kzalloc(sizeof(*info->master), GFP_KERNEL); + if (info->master == NULL) { + printk(KERN_ERR KBUILD_MODNAME ": Out of memory\n"); + return false; + } + info->master->count = info->u.nth.count; + return true; } +static void statistic_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_statistic_info *info = par->matchinfo; + + kfree(info->master); +} + static struct xt_match xt_statistic_mt_reg __read_mostly = { .name = "statistic", .revision = 0, .family = NFPROTO_UNSPEC, .match = statistic_mt, .checkentry = statistic_mt_check, + .destroy = statistic_mt_destroy, .matchsize = sizeof(struct xt_statistic_info), .me = THIS_MODULE, }; -- cgit v1.2.3 From 626ba8fbac9156a94a80be46ffd2f2ce9e4e89a0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Mar 2009 15:50:51 +0100 Subject: netfilter: ctnetlink: fix crash during expectation creation This patch fixes a possible crash due to the missing initialization of the expectation class when nf_ct_expect_related() is called. Reported-by: BORBELY Zoltan Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index cb78aa00399e..ed6d873ad384 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1780,6 +1780,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3, u32 pid, int report) goto out; } + exp->class = 0; exp->expectfn = NULL; exp->flags = 0; exp->master = ct; -- cgit v1.2.3 From ec8d540969da9a70790e9028d57b5b577dd7aa77 Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 16 Mar 2009 15:51:29 +0100 Subject: netfilter: conntrack: fix dropping packet after l4proto->packet() We currently use the negative value in the conntrack code to encode the packet verdict in the error. As NF_DROP is equal to 0, inverting NF_DROP makes no sense and, as a result, no packets are ever dropped. Signed-off-by: Christoph Paasch Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 2 +- net/netfilter/nf_conntrack_proto_tcp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 90ce9ddb9451..f4935e344b61 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -726,7 +726,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, NF_CT_ASSERT(skb->nfct); ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum); - if (ret < 0) { + if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ pr_debug("nf_conntrack_in: Can't track with proto module\n"); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a1edb9c1adee..f3fd154d1ddd 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -859,7 +859,7 @@ static int tcp_packet(struct nf_conn *ct, */ if (nf_ct_kill(ct)) return -NF_REPEAT; - return -NF_DROP; + return NF_DROP; } /* Fall through */ case TCP_CONNTRACK_IGNORE: @@ -892,7 +892,7 @@ static int tcp_packet(struct nf_conn *ct, nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); nf_ct_kill(ct); - return -NF_DROP; + return NF_DROP; } ct->proto.tcp.last_index = index; ct->proto.tcp.last_dir = dir; -- cgit v1.2.3 From d1238d5337e8e53cddea77c2a26d26b6eb5a982f Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 16 Mar 2009 15:52:11 +0100 Subject: netfilter: conntrack: check for NEXTHDR_NONE before header sanity checking NEXTHDR_NONE doesn't has an IPv6 option header, so the first check for the length will always fail and results in a confusing message "too short" if debugging enabled. With this patch, we check for NEXTHDR_NONE before length sanity checkings are done. Signed-off-by: Christoph Paasch Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_reasm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index ed4d79a9e4a6..058a5e4a60c3 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -528,14 +528,14 @@ find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) if (!ipv6_ext_hdr(nexthdr)) { return -1; } - if (len < (int)sizeof(struct ipv6_opt_hdr)) { - pr_debug("too short\n"); - return -1; - } if (nexthdr == NEXTHDR_NONE) { pr_debug("next header is none\n"); return -1; } + if (len < (int)sizeof(struct ipv6_opt_hdr)) { + pr_debug("too short\n"); + return -1; + } if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) BUG(); if (nexthdr == NEXTHDR_AUTH) -- cgit v1.2.3 From 81a1d3c31e3517f9939b3e04d21cf4a6b0997419 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 16 Mar 2009 16:23:30 +0100 Subject: net: sysctl_net - use net_eq to compare nets Signed-off-by: Cyrill Gorcunov Acked-by: Daniel Lezcano Signed-off-by: Patrick McHardy --- net/sysctl_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 972201cd5fa7..0b15d7250c40 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -61,7 +61,7 @@ static struct ctl_table_root net_sysctl_root = { static int net_ctl_ro_header_perms(struct ctl_table_root *root, struct nsproxy *namespaces, struct ctl_table *table) { - if (namespaces->net_ns == &init_net) + if (net_eq(namespaces->net_ns, &init_net)) return table->mode; else return table->mode & ~0222; -- cgit v1.2.3 From 1546000fe8db0d3f47b0ef1dd487ec23fbd95313 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 16 Mar 2009 16:30:49 +0100 Subject: net: netfilter conntrack - add per-net functionality for DCCP protocol Module specific data moved into per-net site and being allocated/freed during net namespace creation/deletion. Signed-off-by: Cyrill Gorcunov Acked-by: Daniel Lezcano Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_dccp.c | 145 ++++++++++++++++++++++++-------- 1 file changed, 108 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8fcf1762fabf..d3d5a7fd73ce 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -16,6 +16,9 @@ #include #include +#include +#include + #include #include #include @@ -23,8 +26,6 @@ static DEFINE_RWLOCK(dccp_lock); -static int nf_ct_dccp_loose __read_mostly = 1; - /* Timeouts are based on values from RFC4340: * * - REQUEST: @@ -72,16 +73,6 @@ static int nf_ct_dccp_loose __read_mostly = 1; #define DCCP_MSL (2 * 60 * HZ) -static unsigned int dccp_timeout[CT_DCCP_MAX + 1] __read_mostly = { - [CT_DCCP_REQUEST] = 2 * DCCP_MSL, - [CT_DCCP_RESPOND] = 4 * DCCP_MSL, - [CT_DCCP_PARTOPEN] = 4 * DCCP_MSL, - [CT_DCCP_OPEN] = 12 * 3600 * HZ, - [CT_DCCP_CLOSEREQ] = 64 * HZ, - [CT_DCCP_CLOSING] = 64 * HZ, - [CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL, -}; - static const char * const dccp_state_names[] = { [CT_DCCP_NONE] = "NONE", [CT_DCCP_REQUEST] = "REQUEST", @@ -393,6 +384,22 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }, }; +/* this module per-net specifics */ +static int dccp_net_id; +struct dccp_net { + int dccp_loose; + unsigned int dccp_timeout[CT_DCCP_MAX + 1]; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_header; + struct ctl_table *sysctl_table; +#endif +}; + +static inline struct dccp_net *dccp_pernet(struct net *net) +{ + return net_generic(net, dccp_net_id); +} + static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { @@ -419,6 +426,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { struct net *net = nf_ct_net(ct); + struct dccp_net *dn; struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -429,7 +437,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; switch (state) { default: - if (nf_ct_dccp_loose == 0) { + dn = dccp_pernet(net); + if (dn->dccp_loose == 0) { msg = "nf_ct_dccp: not picking up existing connection "; goto out_invalid; } @@ -465,6 +474,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, u_int8_t pf, unsigned int hooknum) { struct net *net = nf_ct_net(ct); + struct dccp_net *dn; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -542,7 +552,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; write_unlock_bh(&dccp_lock); - nf_ct_refresh_acct(ct, ctinfo, skb, dccp_timeout[new_state]); + + dn = dccp_pernet(net); + nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); return NF_ACCEPT; } @@ -660,13 +672,11 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) #endif #ifdef CONFIG_SYSCTL -static unsigned int dccp_sysctl_table_users; -static struct ctl_table_header *dccp_sysctl_header; -static ctl_table dccp_sysctl_table[] = { +/* template, data assigned later */ +static struct ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_request", - .data = &dccp_timeout[CT_DCCP_REQUEST], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -674,7 +684,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_respond", - .data = &dccp_timeout[CT_DCCP_RESPOND], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -682,7 +691,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_partopen", - .data = &dccp_timeout[CT_DCCP_PARTOPEN], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -690,7 +698,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_open", - .data = &dccp_timeout[CT_DCCP_OPEN], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -698,7 +705,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_closereq", - .data = &dccp_timeout[CT_DCCP_CLOSEREQ], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -706,7 +712,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_closing", - .data = &dccp_timeout[CT_DCCP_CLOSING], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -714,7 +719,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_timewait", - .data = &dccp_timeout[CT_DCCP_TIMEWAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -722,8 +726,7 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_loose", - .data = &nf_ct_dccp_loose, - .maxlen = sizeof(nf_ct_dccp_loose), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, @@ -751,11 +754,6 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif -#ifdef CONFIG_SYSCTL - .ctl_table_users = &dccp_sysctl_table_users, - .ctl_table_header = &dccp_sysctl_header, - .ctl_table = dccp_sysctl_table, -#endif }; static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { @@ -776,34 +774,107 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif +}; + +static __net_init int dccp_net_init(struct net *net) +{ + struct dccp_net *dn; + int err; + + dn = kmalloc(sizeof(*dn), GFP_KERNEL); + if (!dn) + return -ENOMEM; + + /* default values */ + dn->dccp_loose = 1; + dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; + dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; + dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; + dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; + + err = net_assign_generic(net, dccp_net_id, dn); + if (err) + goto out; + #ifdef CONFIG_SYSCTL - .ctl_table_users = &dccp_sysctl_table_users, - .ctl_table_header = &dccp_sysctl_header, - .ctl_table = dccp_sysctl_table, + err = -ENOMEM; + dn->sysctl_table = kmemdup(dccp_sysctl_table, + sizeof(dccp_sysctl_table), GFP_KERNEL); + if (!dn->sysctl_table) + goto out; + + dn->sysctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; + dn->sysctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; + dn->sysctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; + dn->sysctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; + dn->sysctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; + dn->sysctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; + dn->sysctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; + dn->sysctl_table[7].data = &dn->dccp_loose; + + dn->sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, dn->sysctl_table); + if (!dn->sysctl_header) { + kfree(dn->sysctl_table); + goto out; + } #endif + + return 0; + +out: + kfree(dn); + return err; +} + +static __net_exit void dccp_net_exit(struct net *net) +{ + struct dccp_net *dn = dccp_pernet(net); +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(dn->sysctl_header); + kfree(dn->sysctl_table); +#endif + kfree(dn); + + net_assign_generic(net, dccp_net_id, NULL); +} + +static struct pernet_operations dccp_net_ops = { + .init = dccp_net_init, + .exit = dccp_net_exit, }; static int __init nf_conntrack_proto_dccp_init(void) { int err; - err = nf_conntrack_l4proto_register(&dccp_proto4); + err = register_pernet_gen_subsys(&dccp_net_id, &dccp_net_ops); if (err < 0) goto err1; - err = nf_conntrack_l4proto_register(&dccp_proto6); + err = nf_conntrack_l4proto_register(&dccp_proto4); if (err < 0) goto err2; + + err = nf_conntrack_l4proto_register(&dccp_proto6); + if (err < 0) + goto err3; return 0; -err2: +err3: nf_conntrack_l4proto_unregister(&dccp_proto4); +err2: + unregister_pernet_gen_subsys(dccp_net_id, &dccp_net_ops); err1: return err; } static void __exit nf_conntrack_proto_dccp_fini(void) { + unregister_pernet_gen_subsys(dccp_net_id, &dccp_net_ops); nf_conntrack_l4proto_unregister(&dccp_proto6); nf_conntrack_l4proto_unregister(&dccp_proto4); } -- cgit v1.2.3 From 0269ea4937343536ec7e85649932bc8c9686ea78 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 16 Mar 2009 17:10:36 +0100 Subject: netfilter: xtables: add cluster match This patch adds the iptables cluster match. This match can be used to deploy gateway and back-end load-sharing clusters. The cluster can be composed of 32 nodes maximum (although I have only tested this with two nodes, so I cannot tell what is the real scalability limit of this solution in terms of cluster nodes). Assuming that all the nodes see all packets (see below for an example on how to do that if your switch does not allow this), the cluster match decides if this node has to handle a packet given: (jhash(source IP) % total_nodes) & node_mask For related connections, the master conntrack is used. The following is an example of its use to deploy a gateway cluster composed of two nodes (where this is the node 1): iptables -I PREROUTING -t mangle -i eth1 -m cluster \ --cluster-total-nodes 2 --cluster-local-node 1 \ --cluster-proc-name eth1 -j MARK --set-mark 0xffff iptables -A PREROUTING -t mangle -i eth1 \ -m mark ! --mark 0xffff -j DROP iptables -A PREROUTING -t mangle -i eth2 -m cluster \ --cluster-total-nodes 2 --cluster-local-node 1 \ --cluster-proc-name eth2 -j MARK --set-mark 0xffff iptables -A PREROUTING -t mangle -i eth2 \ -m mark ! --mark 0xffff -j DROP And the following commands to make all nodes see the same packets: ip maddr add 01:00:5e:00:01:01 dev eth1 ip maddr add 01:00:5e:00:01:02 dev eth2 arptables -I OUTPUT -o eth1 --h-length 6 \ -j mangle --mangle-mac-s 01:00:5e:00:01:01 arptables -I INPUT -i eth1 --h-length 6 \ --destination-mac 01:00:5e:00:01:01 \ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27 arptables -I OUTPUT -o eth2 --h-length 6 \ -j mangle --mangle-mac-s 01:00:5e:00:01:02 arptables -I INPUT -i eth2 --h-length 6 \ --destination-mac 01:00:5e:00:01:02 \ -j mangle --mangle-mac-d 00:zz:yy:xx:5a:27 In the case of TCP connections, pickup facility has to be disabled to avoid marking TCP ACK packets coming in the reply direction as valid. echo 0 > /proc/sys/net/netfilter/nf_conntrack_tcp_loose BTW, some final notes: * This match mangles the skbuff pkt_type in case that it detects PACKET_MULTICAST for a non-multicast address. This may be done in a PKTTYPE target for this sole purpose. * This match supersedes the CLUSTERIP target. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 16 +++++ net/netfilter/Makefile | 1 + net/netfilter/xt_cluster.c | 164 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 net/netfilter/xt_cluster.c (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index cdbaaff6d0d6..2562d05dbaf5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -527,6 +527,22 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP This option adds a "TCPOPTSTRIP" target, which allows you to strip TCP options from TCP packets. +config NETFILTER_XT_MATCH_CLUSTER + tristate '"cluster" match support' + depends on NF_CONNTRACK + depends on NETFILTER_ADVANCED + ---help--- + This option allows you to build work-load-sharing clusters of + network servers/stateful firewalls without having a dedicated + load-balancing router/server/switch. Basically, this match returns + true when the packet must be handled by this cluster node. Thus, + all nodes see all packets and this match decides which node handles + what packets. The work-load sharing algorithm is based on source + address hashing. + + If you say Y or M here, try `iptables -m cluster --help` for + more information. + config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 7a9b8397573a..6282060fbda9 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o # matches +obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c new file mode 100644 index 000000000000..ad5bd890e4e8 --- /dev/null +++ b/net/netfilter/xt_cluster.c @@ -0,0 +1,164 @@ +/* + * (C) 2008-2009 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct) +{ + return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; +} + +static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct) +{ + return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; +} + +static inline u_int32_t +xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info) +{ + return jhash_1word(ip, info->hash_seed); +} + +static inline u_int32_t +xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info) +{ + return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed); +} + +static inline u_int32_t +xt_cluster_hash(const struct nf_conn *ct, + const struct xt_cluster_match_info *info) +{ + u_int32_t hash = 0; + + switch(nf_ct_l3num(ct)) { + case AF_INET: + hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info); + break; + case AF_INET6: + hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info); + break; + default: + WARN_ON(1); + break; + } + return (((u64)hash * info->total_nodes) >> 32); +} + +static inline bool +xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) +{ + bool is_multicast = false; + + switch(family) { + case NFPROTO_IPV4: + is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr); + break; + case NFPROTO_IPV6: + is_multicast = ipv6_addr_type(&ipv6_hdr(skb)->daddr) & + IPV6_ADDR_MULTICAST; + break; + default: + WARN_ON(1); + break; + } + return is_multicast; +} + +static bool +xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par) +{ + struct sk_buff *pskb = (struct sk_buff *)skb; + const struct xt_cluster_match_info *info = par->matchinfo; + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned long hash; + + /* This match assumes that all nodes see the same packets. This can be + * achieved if the switch that connects the cluster nodes support some + * sort of 'port mirroring'. However, if your switch does not support + * this, your cluster nodes can reply ARP request using a multicast MAC + * address. Thus, your switch will flood the same packets to the + * cluster nodes with the same multicast MAC address. Using a multicast + * link address is a RFC 1812 (section 3.3.2) violation, but this works + * fine in practise. + * + * Unfortunately, if you use the multicast MAC address, the link layer + * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted + * by TCP and others for packets coming to this node. For that reason, + * this match mangles skbuff's pkt_type if it detects a packet + * addressed to a unicast address but using PACKET_MULTICAST. Yes, I + * know, matches should not alter packets, but we are doing this here + * because we would need to add a PKTTYPE target for this sole purpose. + */ + if (!xt_cluster_is_multicast_addr(skb, par->family) && + skb->pkt_type == PACKET_MULTICAST) { + pskb->pkt_type = PACKET_HOST; + } + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return false; + + if (ct == &nf_conntrack_untracked) + return false; + + if (ct->master) + hash = xt_cluster_hash(ct->master, info); + else + hash = xt_cluster_hash(ct, info); + + return !!((1 << hash) & info->node_mask) ^ + !!(info->flags & XT_CLUSTER_F_INV); +} + +static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_cluster_match_info *info = par->matchinfo; + + if (info->node_mask >= (1 << info->total_nodes)) { + printk(KERN_ERR "xt_cluster: this node mask cannot be " + "higher than the total number of nodes\n"); + return false; + } + return true; +} + +static struct xt_match xt_cluster_match __read_mostly = { + .name = "cluster", + .family = NFPROTO_UNSPEC, + .match = xt_cluster_mt, + .checkentry = xt_cluster_mt_checkentry, + .matchsize = sizeof(struct xt_cluster_match_info), + .me = THIS_MODULE, +}; + +static int __init xt_cluster_mt_init(void) +{ + return xt_register_match(&xt_cluster_match); +} + +static void __exit xt_cluster_mt_fini(void) +{ + xt_unregister_match(&xt_cluster_match); +} + +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: hash-based cluster match"); +MODULE_ALIAS("ipt_cluster"); +MODULE_ALIAS("ip6t_cluster"); +module_init(xt_cluster_mt_init); +module_exit(xt_cluster_mt_fini); -- cgit v1.2.3 From d1c76af9e2434fac3add561e26c61b06503de986 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 16 Mar 2009 10:50:02 -0700 Subject: GRO: Move netpoll checks to correct location As my netpoll fix for net doesn't really work for net-next, we need this update to move the checks into the right place. As it stands we may pass freed skbs to netpoll_receive_skb. This patch also introduces a netpoll_rx_on function to avoid GRO completely if we're invoked through netpoll. This might seem paranoid but as netpoll may have an external receive hook it's better to be safe than sorry. I don't think we need this for 2.6.29 though since there's nothing immediately broken by it. This patch also moves the GRO_* return values to netdevice.h since VLAN needs them too (I tried to avoid this originally but alas this seems to be the easiest way out). This fixes a bug in VLAN where it continued to use the old return value 2 instead of the correct GRO_DROP. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 11 ++++------- net/core/dev.c | 17 +++-------------- 2 files changed, 7 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 2d6e405fc498..6227248597c4 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -79,6 +79,9 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, { struct sk_buff *p; + if (netpoll_rx_on(skb)) + return GRO_NORMAL; + if (skb_bond_should_drop(skb)) goto drop; @@ -98,7 +101,7 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, return dev_gro_receive(napi, skb); drop: - return 2; + return GRO_DROP; } int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, @@ -106,9 +109,6 @@ int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, { skb_gro_reset_offset(skb); - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; - return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb); } EXPORT_SYMBOL(vlan_gro_receive); @@ -121,9 +121,6 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, if (!skb) return NET_RX_DROP; - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; - return napi_frags_finish(napi, skb, vlan_gro_common(napi, grp, vlan_tci, skb)); } diff --git a/net/core/dev.c b/net/core/dev.c index 033d7ca28e6e..7bd3c29c5a78 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -135,14 +135,6 @@ /* This should be increased if a protocol with a bigger head is added. */ #define GRO_MAX_HEAD (MAX_HEADER + 128) -enum { - GRO_MERGED, - GRO_MERGED_FREE, - GRO_HELD, - GRO_NORMAL, - GRO_DROP, -}; - /* * The list of packet types we will receive (as opposed to discard) * and the routines to invoke. @@ -2474,6 +2466,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff *p; + if (netpoll_rx_on(skb)) + return GRO_NORMAL; + for (p = napi->gro_list; p; p = p->next) { NAPI_GRO_CB(p)->same_flow = !compare_ether_header( skb_mac_header(p), skb_gro_mac_header(skb)); @@ -2487,9 +2482,6 @@ int napi_skb_finish(int ret, struct sk_buff *skb) { int err = NET_RX_SUCCESS; - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; - switch (ret) { case GRO_NORMAL: return netif_receive_skb(skb); @@ -2587,9 +2579,6 @@ int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret) { int err = NET_RX_SUCCESS; - if (netpoll_receive_skb(skb)) - return NET_RX_DROP; - switch (ret) { case GRO_NORMAL: case GRO_HELD: -- cgit v1.2.3 From 6f16bf3bdb94b567e2b6663378efb2dbf40db133 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Wed, 11 Mar 2009 11:05:25 -0400 Subject: lib80211: silence excessive crypto debugging messages When they were part of the now defunct ieee80211 component, these messages were only visible when special debugging settings were enabled. Let's mirror that with a new lib80211 debugging Kconfig option. Signed-off-by: John W. Linville --- net/wireless/Kconfig | 10 ++++++++++ net/wireless/lib80211_crypt_ccmp.c | 2 ++ net/wireless/lib80211_crypt_tkip.c | 4 ++++ 3 files changed, 16 insertions(+) (limited to 'net') diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index e28e2b8fa436..092ae6faccca 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -102,3 +102,13 @@ config LIB80211_CRYPT_CCMP config LIB80211_CRYPT_TKIP tristate + +config LIB80211_DEBUG + bool "lib80211 debugging messages" + depends on LIB80211 + default n + ---help--- + You can enable this if you want verbose debugging messages + from lib80211. + + If unsure, say N. diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index db428194c16a..2301dc1edc4c 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -337,6 +337,7 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += 8; if (ccmp_replay_check(pn, key->rx_pn)) { +#ifdef CONFIG_LIB80211_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "CCMP: replay detected: STA=%pM " "previous PN %02x%02x%02x%02x%02x%02x " @@ -346,6 +347,7 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) key->rx_pn[3], key->rx_pn[4], key->rx_pn[5], pn[0], pn[1], pn[2], pn[3], pn[4], pn[5]); } +#endif key->dot11RSNAStatsCCMPReplays++; return -4; } diff --git a/net/wireless/lib80211_crypt_tkip.c b/net/wireless/lib80211_crypt_tkip.c index 7e8e22bfed90..c36287399d7e 100644 --- a/net/wireless/lib80211_crypt_tkip.c +++ b/net/wireless/lib80211_crypt_tkip.c @@ -465,12 +465,14 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += 8; if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { +#ifdef CONFIG_LIB80211_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "TKIP: replay detected: STA=%pM" " previous TSC %08x%04x received TSC " "%08x%04x\n", hdr->addr2, tkey->rx_iv32, tkey->rx_iv16, iv32, iv16); } +#endif tkey->dot11RSNAStatsTKIPReplays++; return -4; } @@ -505,10 +507,12 @@ static int lib80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) * it needs to be recalculated for the next packet. */ tkey->rx_phase1_done = 0; } +#ifdef CONFIG_LIB80211_DEBUG if (net_ratelimit()) { printk(KERN_DEBUG "TKIP: ICV error detected: STA=" "%pM\n", hdr->addr2); } +#endif tkey->dot11RSNAStatsTKIPICVErrors++; return -5; } -- cgit v1.2.3 From 055249d20de06c290fe7625be0a7164bef3958f5 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 13 Mar 2009 13:59:39 +0200 Subject: mac80211: Fix panic on fragmentation with power saving It was possible to hit a kernel panic on NULL pointer dereference in dev_queue_xmit() when sending power save buffered frames to a STA that woke up from sleep. This happened when the buffered frame was requeued for transmission in ap_sta_ps_end(). In order to avoid the panic, copy the skb->dev and skb->iif values from the first fragment to all other fragments. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/tx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 94de5033f0b6..37e3d5ef7e3f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -752,6 +752,8 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) skb_copy_queue_mapping(frag, first); frag->do_not_encrypt = first->do_not_encrypt; + frag->dev = first->dev; + frag->iif = first->iif; pos += copylen; left -= copylen; -- cgit v1.2.3 From 0eeb59fe2cd84b62f374874a59e62402e13f48b3 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 5 Mar 2009 17:23:46 +0200 Subject: mac80211: Fix WMM ACM parsing and AC downgrade operation Incorrect local->wmm_acm bits were set for AC_BK and AC_BE. Fix this and add some comments to make it easier to understand the AC-to-UP(pair) mapping. Set the wmm_acm bits (and show WMM debug) even if the driver does not implement conf_tx() handler. In addition, fix the ACM-based AC downgrade code to not use the highest priority in error cases. We need to break the loop to get the correct AC_BK value (3) instead of returning 0 (which would indicate AC_VO). The comment here was not really very useful either, so let's provide somewhat more helpful description of the situation. Since it is very unlikely that the ACM flag would be set for AC_BK and AC_BE, these bugs are not likely to be seen in real life networks. Anyway, better do these things correctly should someone really use silly AP configuration (and to pass some functionality tests, too). Remove the TODO comment about handling ACM. Downgrading AC is perfectly valid mechanism for ACM. Eventually, we may add support for WMM-AC and send a request for a TS, but anyway, that functionality won't be here at the location of this TODO comment. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 24 ++++++++++-------------- net/mac80211/wme.c | 9 ++++++--- 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 391445c6b892..eeb6da8505c6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -417,9 +417,6 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, memset(¶ms, 0, sizeof(params)); - if (!local->ops->conf_tx) - return; - local->wmm_acm = 0; for (; left >= 4; left -= 4, pos += 4) { int aci = (pos[0] >> 5) & 0x03; @@ -427,26 +424,26 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, int queue; switch (aci) { - case 1: + case 1: /* AC_BK */ queue = 3; if (acm) - local->wmm_acm |= BIT(0) | BIT(3); + local->wmm_acm |= BIT(1) | BIT(2); /* BK/- */ break; - case 2: + case 2: /* AC_VI */ queue = 1; if (acm) - local->wmm_acm |= BIT(4) | BIT(5); + local->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */ break; - case 3: + case 3: /* AC_VO */ queue = 0; if (acm) - local->wmm_acm |= BIT(6) | BIT(7); + local->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */ break; - case 0: + case 0: /* AC_BE */ default: queue = 2; if (acm) - local->wmm_acm |= BIT(1) | BIT(2); + local->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */ break; } @@ -460,9 +457,8 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local, local->mdev->name, queue, aci, acm, params.aifs, params.cw_min, params.cw_max, params.txop); #endif - /* TODO: handle ACM (block TX, fallback to next lowest allowed - * AC for now) */ - if (local->ops->conf_tx(local_to_hw(local), queue, ¶ms)) { + if (local->ops->conf_tx && + local->ops->conf_tx(local_to_hw(local), queue, ¶ms)) { printk(KERN_DEBUG "%s: failed to set TX queue " "parameters for queue %d\n", local->mdev->name, queue); } diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 093a4ab7f28b..0b8ad1f4ecdd 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -99,10 +99,13 @@ static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) /* in case we are a client verify acm is not set for this ac */ while (unlikely(local->wmm_acm & BIT(skb->priority))) { if (wme_downgrade_ac(skb)) { - /* The old code would drop the packet in this - * case. + /* + * This should not really happen. The AP has marked all + * lower ACs to require admission control which is not + * a reasonable configuration. Allow the frame to be + * transmitted using AC_BK as a workaround. */ - return 0; + break; } } -- cgit v1.2.3 From 611b6a82aaae33a4d3a274fd6cccbdcd1c7cef4d Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 5 Mar 2009 21:19:21 -0800 Subject: cfg80211: Enable passive scan on channels 12-14 for world roaming Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 58df98f10990..25eb1554f8a6 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -86,15 +86,26 @@ struct reg_beacon { /* We keep a static world regulatory domain in case of the absence of CRDA */ static const struct ieee80211_regdomain world_regdom = { - .n_reg_rules = 3, + .n_reg_rules = 5, .alpha2 = "00", .reg_rules = { /* IEEE 802.11b/g, channels 1..11 */ REG_RULE(2412-10, 2462+10, 40, 6, 20, 0), - /* IEEE 802.11a, channel 36..48 */ - REG_RULE(5180-10, 5240+10, 40, 6, 23, + /* IEEE 802.11b/g, channels 12..13. No HT40 + * channel fits here. */ + REG_RULE(2467-10, 2472+10, 20, 6, 20, NL80211_RRF_PASSIVE_SCAN | NL80211_RRF_NO_IBSS), + /* IEEE 802.11 channel 14 - Only JP enables + * this and for 802.11b only */ + REG_RULE(2484-10, 2484+10, 20, 6, 20, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS | + NL80211_RRF_NO_OFDM), + /* IEEE 802.11a, channel 36..48 */ + REG_RULE(5180-10, 5240+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS), /* NB: 5260 MHz - 5700 MHz requies DFS */ -- cgit v1.2.3 From ec329acef99ded8dad59e1ef8a5a02b823083353 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Thu, 5 Mar 2009 21:19:22 -0800 Subject: cfg80211: fix max tx power for world regdom on 5 GHz to 20dBm This is the lowest value amongst countries which do enable 5 GHz operation. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 25eb1554f8a6..fa738be897a3 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -103,14 +103,14 @@ static const struct ieee80211_regdomain world_regdom = { NL80211_RRF_NO_IBSS | NL80211_RRF_NO_OFDM), /* IEEE 802.11a, channel 36..48 */ - REG_RULE(5180-10, 5240+10, 40, 6, 23, + REG_RULE(5180-10, 5240+10, 40, 6, 20, NL80211_RRF_PASSIVE_SCAN | NL80211_RRF_NO_IBSS), /* NB: 5260 MHz - 5700 MHz requies DFS */ /* IEEE 802.11a, channel 149..165 */ - REG_RULE(5745-10, 5825+10, 40, 6, 23, + REG_RULE(5745-10, 5825+10, 40, 6, 20, NL80211_RRF_PASSIVE_SCAN | NL80211_RRF_NO_IBSS), } -- cgit v1.2.3 From af88b9078d4aa31d667d2d82601ede9cae3bac37 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Mon, 9 Mar 2009 15:47:08 +0100 Subject: mac80211: handle failed scan requests in STA mode If cfg80211 requests a scan it awaits either a return code != 0 from the scan function or the cfg80211_scan_done to be called. In case of a STA mac80211's scan function ever returns 0 and queues the scan request. If ieee80211_sta_work is executed and ieee80211_start_scan fails for some reason cfg80211_scan_done will never be called but cfg80211 still thinks the scan was triggered successfully and will refuse any future scan requests due to drv->scan_req not being cleaned up. If a scan is triggered from within the MLME a similar problem appears. If ieee80211_start_scan returns an error, local->scan_req will not be reset and mac80211 will refuse any future scan requests. Hence, in both cases call ieee80211_scan_failed (which notifies cfg80211 and resets local->scan_req) if ieee80211_start_scan returns an error. Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 14 ++++++++++++-- net/mac80211/scan.c | 12 ++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ecbc8e0cb3e7..fbb91f1aebb2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -972,6 +972,7 @@ int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len); void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); +void ieee80211_scan_failed(struct ieee80211_local *local); int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, struct cfg80211_scan_request *req); struct ieee80211_bss * diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index eeb6da8505c6..841b8450b3de 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1720,7 +1720,10 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata) local->int_scan_req.ssids[0].ssid_len = 0; else local->int_scan_req.ssids[0].ssid_len = ifmgd->ssid_len; - ieee80211_start_scan(sdata, &local->int_scan_req); + + if (ieee80211_start_scan(sdata, &local->int_scan_req)) + ieee80211_scan_failed(local); + ifmgd->state = IEEE80211_STA_MLME_AUTHENTICATE; set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); } else { @@ -1757,7 +1760,14 @@ static void ieee80211_sta_work(struct work_struct *work) ifmgd->state != IEEE80211_STA_MLME_AUTHENTICATE && ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE && test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifmgd->request)) { - ieee80211_start_scan(sdata, local->scan_req); + /* + * The call to ieee80211_start_scan can fail but ieee80211_request_scan + * (which queued ieee80211_sta_work) did not return an error. Thus, call + * ieee80211_scan_failed here if ieee80211_start_scan fails in order to + * notify the scan requester. + */ + if (ieee80211_start_scan(sdata, local->scan_req)) + ieee80211_scan_failed(local); return; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 0e81e1633a66..5030a3c87509 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -202,6 +202,18 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, return RX_QUEUED; } +void ieee80211_scan_failed(struct ieee80211_local *local) +{ + if (WARN_ON(!local->scan_req)) + return; + + /* notify cfg80211 about the failed scan */ + if (local->scan_req != &local->int_scan_req) + cfg80211_scan_done(local->scan_req, true); + + local->scan_req = NULL; +} + void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); -- cgit v1.2.3 From 1a28c78b46caec7628985728e7f0c4aef68e33e7 Mon Sep 17 00:00:00 2001 From: Herton Ronaldo Krzesinski Date: Tue, 10 Mar 2009 10:11:09 -0300 Subject: mac80211: deauth before flushing STA information Even after commit "mac80211: deauth when interface is marked down" (e327b847 on Linus tree), userspace still isn't notified when interface goes down. There isn't a problem with this commit, but because of other code changes it doesn't work on kernels >= 2.6.28 (works if same/similar change applied on 2.6.27 for example). The issue is as follows: after commit "mac80211: restructure disassoc/deauth flows" in 2.6.28, the call to ieee80211_sta_deauthenticate added by commit e327b847 will not work: because we do sta_info_flush(local, sdata) inside ieee80211_stop (iface.c), all stations in interface are cleared, so when calling ieee80211_sta_deauthenticate->ieee80211_set_disassoc (mlme.c), inside ieee80211_set_disassoc we have this in the beginning: sta = sta_info_get(local, ifsta->bssid); if (!sta) { The !sta check triggers, thus the function returns early and ieee80211_sta_send_apinfo(sdata, ifsta) later isn't called, so wpa_supplicant/userspace isn't notified with SIOCGIWAP. This commit moves deauthentication to before flushing STA info (sta_info_flush), thus the above can't happen and userspace is really notified when interface goes down. Signed-off-by: Herton Ronaldo Krzesinski Signed-off-by: John W. Linville --- net/mac80211/iface.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 2acc416e77e1..f9f27b9cadbe 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -369,6 +369,18 @@ static int ieee80211_stop(struct net_device *dev) rcu_read_unlock(); + /* + * Announce that we are leaving the network, in case we are a + * station interface type. This must be done before removing + * all stations associated with sta_info_flush, otherwise STA + * information will be gone and no announce being done. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (sdata->u.mgd.state != IEEE80211_STA_MLME_DISABLED) + ieee80211_sta_deauthenticate(sdata, + WLAN_REASON_DEAUTH_LEAVING); + } + /* * Remove all stations associated with this interface. * @@ -454,10 +466,6 @@ static int ieee80211_stop(struct net_device *dev) netif_addr_unlock_bh(local->mdev); break; case NL80211_IFTYPE_STATION: - /* Announce that we are leaving the network. */ - if (sdata->u.mgd.state != IEEE80211_STA_MLME_DISABLED) - ieee80211_sta_deauthenticate(sdata, - WLAN_REASON_DEAUTH_LEAVING); memset(sdata->u.mgd.bssid, 0, ETH_ALEN); del_timer_sync(&sdata->u.mgd.chswitch_timer); del_timer_sync(&sdata->u.mgd.timer); -- cgit v1.2.3 From 0fee54cab7d5ebc58fad8c6a0703c4ea016405e3 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 9 Mar 2009 22:07:40 -0400 Subject: cfg80211: remove REGDOM_SET_BY_INIT This is not used as we can always just assume the first regulatory domain set will _always_ be a static regulatory domain. REGDOM_SET_BY_CORE will be the first request from cfg80211 for a regdomain and that then populates the first regulatory request. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index fa738be897a3..47ff44751b70 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1255,8 +1255,6 @@ static int ignore_request(struct wiphy *wiphy, return 0; switch (pending_request->initiator) { - case REGDOM_SET_BY_INIT: - return -EINVAL; case REGDOM_SET_BY_CORE: return -EINVAL; case REGDOM_SET_BY_COUNTRY_IE: -- cgit v1.2.3 From 7db90f4a25bd4184f3d36dfa4f512f53b0448da7 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 9 Mar 2009 22:07:41 -0400 Subject: cfg80211: move enum reg_set_by to nl80211.h We do this so we can later inform userspace who set the regulatory domain and provide details of the request. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/core.c | 2 +- net/wireless/core.h | 3 +- net/wireless/reg.c | 82 +++++++++++++++++++++++++++++------------------------ 3 files changed, 48 insertions(+), 39 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index dd7f222919fe..c939f5ee065e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -350,7 +350,7 @@ int wiphy_register(struct wiphy *wiphy) mutex_lock(&cfg80211_mutex); /* set up regulatory info */ - wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); + wiphy_update_regulatory(wiphy, NL80211_REGDOM_SET_BY_CORE); res = device_add(&drv->wiphy.dev); if (res) diff --git a/net/wireless/core.h b/net/wireless/core.h index f6c53f5807f4..6acd483a61f8 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -136,7 +136,8 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv, char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); -void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); +void wiphy_update_regulatory(struct wiphy *wiphy, + enum nl80211_reg_initiator setby); void cfg80211_bss_expire(struct cfg80211_registered_device *dev); void cfg80211_bss_age(struct cfg80211_registered_device *dev, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 47ff44751b70..68fde6d33dc3 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -857,8 +857,8 @@ static int freq_reg_info_regd(struct wiphy *wiphy, * Follow the driver's regulatory domain, if present, unless a country * IE has been processed or a user wants to help complaince further */ - if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE && - last_request->initiator != REGDOM_SET_BY_USER && + if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && + last_request->initiator != NL80211_REGDOM_SET_BY_USER && wiphy->regd) regd = wiphy->regd; @@ -943,7 +943,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, * http://tinyurl.com/11d-clarification */ if (r == -ERANGE && - last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { + last_request->initiator == + NL80211_REGDOM_SET_BY_COUNTRY_IE) { #ifdef CONFIG_CFG80211_REG_DEBUG printk(KERN_DEBUG "cfg80211: Leaving channel %d MHz " "intact on %s - no rule found in band on " @@ -956,7 +957,8 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, * for the band so we respect its band definitions */ #ifdef CONFIG_CFG80211_REG_DEBUG - if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + if (last_request->initiator == + NL80211_REGDOM_SET_BY_COUNTRY_IE) printk(KERN_DEBUG "cfg80211: Disabling " "channel %d MHz on %s due to " "Country IE\n", @@ -970,7 +972,7 @@ static void handle_channel(struct wiphy *wiphy, enum ieee80211_band band, power_rule = ®_rule->power_rule; - if (last_request->initiator == REGDOM_SET_BY_DRIVER && + if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && request_wiphy && request_wiphy == wiphy && request_wiphy->strict_regulatory) { /* @@ -1011,11 +1013,12 @@ static void handle_band(struct wiphy *wiphy, enum ieee80211_band band) handle_channel(wiphy, band, i); } -static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) +static bool ignore_reg_update(struct wiphy *wiphy, + enum nl80211_reg_initiator initiator) { if (!last_request) return true; - if (setby == REGDOM_SET_BY_CORE && + if (initiator == NL80211_REGDOM_SET_BY_CORE && wiphy->custom_regulatory) return true; /* @@ -1028,12 +1031,12 @@ static bool ignore_reg_update(struct wiphy *wiphy, enum reg_set_by setby) return false; } -static void update_all_wiphy_regulatory(enum reg_set_by setby) +static void update_all_wiphy_regulatory(enum nl80211_reg_initiator initiator) { struct cfg80211_registered_device *drv; list_for_each_entry(drv, &cfg80211_drv_list, list) - wiphy_update_regulatory(&drv->wiphy, setby); + wiphy_update_regulatory(&drv->wiphy, initiator); } static void handle_reg_beacon(struct wiphy *wiphy, @@ -1124,7 +1127,7 @@ static bool reg_is_world_roaming(struct wiphy *wiphy) if (is_world_regdom(cfg80211_regdomain->alpha2) || (wiphy->regd && is_world_regdom(wiphy->regd->alpha2))) return true; - if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE && + if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && wiphy->custom_regulatory) return true; return false; @@ -1138,11 +1141,12 @@ static void reg_process_beacons(struct wiphy *wiphy) wiphy_update_beacon_reg(wiphy); } -void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) +void wiphy_update_regulatory(struct wiphy *wiphy, + enum nl80211_reg_initiator initiator) { enum ieee80211_band band; - if (ignore_reg_update(wiphy, setby)) + if (ignore_reg_update(wiphy, initiator)) goto out; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) @@ -1255,15 +1259,16 @@ static int ignore_request(struct wiphy *wiphy, return 0; switch (pending_request->initiator) { - case REGDOM_SET_BY_CORE: + case NL80211_REGDOM_SET_BY_CORE: return -EINVAL; - case REGDOM_SET_BY_COUNTRY_IE: + case NL80211_REGDOM_SET_BY_COUNTRY_IE: last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); if (unlikely(!is_an_alpha2(pending_request->alpha2))) return -EINVAL; - if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { + if (last_request->initiator == + NL80211_REGDOM_SET_BY_COUNTRY_IE) { if (last_wiphy != wiphy) { /* * Two cards with two APs claiming different @@ -1284,8 +1289,8 @@ static int ignore_request(struct wiphy *wiphy, return -EALREADY; } return REG_INTERSECT; - case REGDOM_SET_BY_DRIVER: - if (last_request->initiator == REGDOM_SET_BY_CORE) { + case NL80211_REGDOM_SET_BY_DRIVER: + if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE) { if (is_old_static_regdom(cfg80211_regdomain)) return 0; if (regdom_changes(pending_request->alpha2)) @@ -1298,28 +1303,28 @@ static int ignore_request(struct wiphy *wiphy, * back in or if you add a new device for which the previously * loaded card also agrees on the regulatory domain. */ - if (last_request->initiator == REGDOM_SET_BY_DRIVER && + if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && !regdom_changes(pending_request->alpha2)) return -EALREADY; return REG_INTERSECT; - case REGDOM_SET_BY_USER: - if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + case NL80211_REGDOM_SET_BY_USER: + if (last_request->initiator == NL80211_REGDOM_SET_BY_COUNTRY_IE) return REG_INTERSECT; /* * If the user knows better the user should set the regdom * to their country before the IE is picked up */ - if (last_request->initiator == REGDOM_SET_BY_USER && + if (last_request->initiator == NL80211_REGDOM_SET_BY_USER && last_request->intersect) return -EOPNOTSUPP; /* * Process user requests only after previous user/driver/core * requests have been processed */ - if (last_request->initiator == REGDOM_SET_BY_CORE || - last_request->initiator == REGDOM_SET_BY_DRIVER || - last_request->initiator == REGDOM_SET_BY_USER) { + if (last_request->initiator == NL80211_REGDOM_SET_BY_CORE || + last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER || + last_request->initiator == NL80211_REGDOM_SET_BY_USER) { if (regdom_changes(last_request->alpha2)) return -EAGAIN; } @@ -1359,7 +1364,8 @@ static int __regulatory_hint(struct wiphy *wiphy, r = ignore_request(wiphy, pending_request); if (r == REG_INTERSECT) { - if (pending_request->initiator == REGDOM_SET_BY_DRIVER) { + if (pending_request->initiator == + NL80211_REGDOM_SET_BY_DRIVER) { r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); if (r) { kfree(pending_request); @@ -1374,7 +1380,8 @@ static int __regulatory_hint(struct wiphy *wiphy, * wiphy */ if (r == -EALREADY && - pending_request->initiator == REGDOM_SET_BY_DRIVER) { + pending_request->initiator == + NL80211_REGDOM_SET_BY_DRIVER) { r = reg_copy_regd(&wiphy->regd, cfg80211_regdomain); if (r) { kfree(pending_request); @@ -1425,7 +1432,7 @@ static void reg_process_hint(struct regulatory_request *reg_request) if (wiphy_idx_valid(reg_request->wiphy_idx)) wiphy = wiphy_idx_to_wiphy(reg_request->wiphy_idx); - if (reg_request->initiator == REGDOM_SET_BY_DRIVER && + if (reg_request->initiator == NL80211_REGDOM_SET_BY_DRIVER && !wiphy) { kfree(reg_request); goto out; @@ -1439,7 +1446,7 @@ out: mutex_unlock(&cfg80211_mutex); } -/* Processes regulatory hints, this is all the REGDOM_SET_BY_* */ +/* Processes regulatory hints, this is all the NL80211_REGDOM_SET_BY_* */ static void reg_process_pending_hints(void) { struct regulatory_request *reg_request; @@ -1523,7 +1530,7 @@ static int regulatory_hint_core(const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; - request->initiator = REGDOM_SET_BY_CORE; + request->initiator = NL80211_REGDOM_SET_BY_CORE; queue_regulatory_request(request); @@ -1544,7 +1551,7 @@ int regulatory_hint_user(const char *alpha2) request->wiphy_idx = WIPHY_IDX_STALE; request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; - request->initiator = REGDOM_SET_BY_USER, + request->initiator = NL80211_REGDOM_SET_BY_USER, queue_regulatory_request(request); @@ -1570,7 +1577,7 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) request->alpha2[0] = alpha2[0]; request->alpha2[1] = alpha2[1]; - request->initiator = REGDOM_SET_BY_DRIVER; + request->initiator = NL80211_REGDOM_SET_BY_DRIVER; queue_regulatory_request(request); @@ -1719,7 +1726,7 @@ void regulatory_hint_11d(struct wiphy *wiphy, request->wiphy_idx = get_wiphy_idx(wiphy); request->alpha2[0] = rd->alpha2[0]; request->alpha2[1] = rd->alpha2[1]; - request->initiator = REGDOM_SET_BY_COUNTRY_IE; + request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; request->country_ie_checksum = checksum; request->country_ie_env = env; @@ -1827,7 +1834,8 @@ static void print_regdomain(const struct ieee80211_regdomain *rd) if (is_intersected_alpha2(rd->alpha2)) { - if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) { + if (last_request->initiator == + NL80211_REGDOM_SET_BY_COUNTRY_IE) { struct cfg80211_registered_device *drv; drv = cfg80211_drv_by_wiphy_idx( last_request->wiphy_idx); @@ -1919,7 +1927,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * rd is non static (it means CRDA was present and was used last) * and the pending request came in from a country IE */ - if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { + if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { /* * If someone else asked us to change the rd lets only bother * checking if the alpha2 changes if CRDA was already called @@ -1951,7 +1959,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) if (!last_request->intersect) { int r; - if (last_request->initiator != REGDOM_SET_BY_DRIVER) { + if (last_request->initiator != NL80211_REGDOM_SET_BY_DRIVER) { reset_regdomains(); cfg80211_regdomain = rd; return 0; @@ -1975,7 +1983,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) /* Intersection requires a bit more work */ - if (last_request->initiator != REGDOM_SET_BY_COUNTRY_IE) { + if (last_request->initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE) { intersected_rd = regdom_intersect(rd, cfg80211_regdomain); if (!intersected_rd) @@ -1986,7 +1994,7 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) * However if a driver requested this specific regulatory * domain we keep it for its private use */ - if (last_request->initiator == REGDOM_SET_BY_DRIVER) + if (last_request->initiator == NL80211_REGDOM_SET_BY_DRIVER) request_wiphy->regd = rd; else kfree(rd); -- cgit v1.2.3 From 73d54c9e74c4d8ee8a41bc516f481f0f754eca32 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 9 Mar 2009 22:07:42 -0400 Subject: cfg80211: add regulatory netlink multicast group This allows us to send to userspace "regulatory" events. For now we just send an event when we change regulatory domains. We also notify userspace when devices are using their own custom world roaming regulatory domains. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/core.c | 11 +++++++++ net/wireless/nl80211.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 5 ++++ net/wireless/reg.c | 13 ++++++++++- 4 files changed, 90 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index c939f5ee065e..17fe39049740 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -365,6 +365,17 @@ int wiphy_register(struct wiphy *wiphy) if (IS_ERR(drv->wiphy.debugfsdir)) drv->wiphy.debugfsdir = NULL; + if (wiphy->custom_regulatory) { + struct regulatory_request request; + + request.wiphy_idx = get_wiphy_idx(wiphy); + request.initiator = NL80211_REGDOM_SET_BY_DRIVER; + request.alpha2[0] = '9'; + request.alpha2[1] = '9'; + + nl80211_send_reg_change_event(&request); + } + res = 0; out_unlock: mutex_unlock(&cfg80211_mutex); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 531bb67cf502..8ac3d26014a8 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2739,6 +2739,9 @@ static struct genl_multicast_group nl80211_config_mcgrp = { static struct genl_multicast_group nl80211_scan_mcgrp = { .name = "scan", }; +static struct genl_multicast_group nl80211_regulatory_mcgrp = { + .name = "regulatory", +}; /* notification functions */ @@ -2818,6 +2821,61 @@ void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, genlmsg_multicast(msg, 0, nl80211_scan_mcgrp.id, GFP_KERNEL); } +/* + * This can happen on global regulatory changes or device specific settings + * based on custom world regulatory domains. + */ +void nl80211_send_reg_change_event(struct regulatory_request *request) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE); + if (!hdr) { + nlmsg_free(msg); + return; + } + + /* Userspace can always count this one always being set */ + NLA_PUT_U8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator); + + if (request->alpha2[0] == '0' && request->alpha2[1] == '0') + NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE, + NL80211_REGDOM_TYPE_WORLD); + else if (request->alpha2[0] == '9' && request->alpha2[1] == '9') + NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE, + NL80211_REGDOM_TYPE_CUSTOM_WORLD); + else if ((request->alpha2[0] == '9' && request->alpha2[1] == '8') || + request->intersect) + NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE, + NL80211_REGDOM_TYPE_INTERSECTION); + else { + NLA_PUT_U8(msg, NL80211_ATTR_REG_TYPE, + NL80211_REGDOM_TYPE_COUNTRY); + NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2, request->alpha2); + } + + if (wiphy_idx_valid(request->wiphy_idx)) + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_regulatory_mcgrp.id, GFP_KERNEL); + + return; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + /* initialisation/exit functions */ int nl80211_init(void) @@ -2842,6 +2900,10 @@ int nl80211_init(void) if (err) goto err_out; + err = genl_register_mc_group(&nl80211_fam, &nl80211_regulatory_mcgrp); + if (err) + goto err_out; + return 0; err_out: genl_unregister_family(&nl80211_fam); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 69787b621365..e65a3c38c52f 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -11,6 +11,7 @@ extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, struct net_device *netdev); extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, struct net_device *netdev); +extern void nl80211_send_reg_change_event(struct regulatory_request *request); #else static inline int nl80211_init(void) { @@ -31,6 +32,10 @@ static inline void nl80211_send_scan_aborted( struct cfg80211_registered_device *rdev, struct net_device *netdev) {} +static inline void +nl80211_send_reg_change_event(struct regulatory_request *request) +{ +} #endif /* CONFIG_NL80211 */ #endif /* __NET_WIRELESS_NL80211_H */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 68fde6d33dc3..eb8b8ed16155 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -41,6 +41,7 @@ #include #include "core.h" #include "reg.h" +#include "nl80211.h" /* Receipt of information from last regulatory request */ static struct regulatory_request *last_request; @@ -1403,8 +1404,16 @@ new_request: pending_request = NULL; /* When r == REG_INTERSECT we do need to call CRDA */ - if (r < 0) + if (r < 0) { + /* + * Since CRDA will not be called in this case as we already + * have applied the requested regulatory domain before we just + * inform userspace we have processed the request + */ + if (r == -EALREADY) + nl80211_send_reg_change_event(last_request); return r; + } /* * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled @@ -2084,6 +2093,8 @@ int set_regdom(const struct ieee80211_regdomain *rd) print_regdomain(cfg80211_regdomain); + nl80211_send_reg_change_event(last_request); + return r; } -- cgit v1.2.3 From 2ffb4558194037133121e260022baa0d21590473 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 17 Mar 2009 13:10:52 -0700 Subject: gro: Fix vlan/netpoll check again Jarek Poplawski pointed out that my previous fix is broken for VLAN+netpoll as if netpoll is enabled we'd end up in the normal receive path instead of the VLAN receive path. This patch fixes it by calling the VLAN receive hook. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/8021q/vlan_core.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 6227248597c4..654e45f5719d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -79,9 +79,6 @@ static int vlan_gro_common(struct napi_struct *napi, struct vlan_group *grp, { struct sk_buff *p; - if (netpoll_rx_on(skb)) - return GRO_NORMAL; - if (skb_bond_should_drop(skb)) goto drop; @@ -107,6 +104,9 @@ drop: int vlan_gro_receive(struct napi_struct *napi, struct vlan_group *grp, unsigned int vlan_tci, struct sk_buff *skb) { + if (netpoll_rx_on(skb)) + return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + skb_gro_reset_offset(skb); return napi_skb_finish(vlan_gro_common(napi, grp, vlan_tci, skb), skb); @@ -121,6 +121,9 @@ int vlan_gro_frags(struct napi_struct *napi, struct vlan_group *grp, if (!skb) return NET_RX_DROP; + if (netpoll_rx_on(skb)) + return vlan_hwaccel_receive_skb(skb, grp, vlan_tci); + return napi_frags_finish(napi, skb, vlan_gro_common(napi, grp, vlan_tci, skb)); } -- cgit v1.2.3 From 303c6a0251852ecbdc5c15e466dcaff5971f7517 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 17 Mar 2009 13:11:29 -0700 Subject: gro: Fix legacy path napi_complete crash On the legacy netif_rx path, I incorrectly tried to optimise the napi_complete call by using __napi_complete before we reenable IRQs. This simply doesn't work since we need to flush the held GRO packets first. This patch fixes it by doing the obvious thing of reenabling IRQs first and then calling napi_complete. Reported-by: Frank Blaschka Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index f1129706ce7b..2565f6d1d661 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2588,9 +2588,9 @@ static int process_backlog(struct napi_struct *napi, int quota) local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); if (!skb) { - __napi_complete(napi); local_irq_enable(); - break; + napi_complete(napi); + goto out; } local_irq_enable(); @@ -2599,6 +2599,7 @@ static int process_backlog(struct napi_struct *napi, int quota) napi_gro_flush(napi); +out: return work; } -- cgit v1.2.3 From cd91566e4bdbcb8841385e4b2eacc8d0c29c9208 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Mar 2009 17:28:37 +0100 Subject: netfilter: ctnetlink: remove remaining module refcounting Convert the remaining refcount users. As pointed out by Patrick McHardy, the protocols can be accessed safely using RCU. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9fb7cf7504fa..735ea9c1a96f 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -599,7 +599,8 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); - l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(tuple->src.l3num); if (likely(l3proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_IP_MAX, @@ -608,7 +609,7 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) ret = l3proto->nlattr_to_tuple(tb, tuple); } - nf_ct_l3proto_put(l3proto); + rcu_read_unlock(); return ret; } @@ -633,7 +634,8 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, return -EINVAL; tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]); - l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); + rcu_read_lock(); + l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum); if (likely(l4proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_PROTO_MAX, @@ -642,7 +644,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, ret = l4proto->nlattr_to_tuple(tb, tuple); } - nf_ct_l4proto_put(l4proto); + rcu_read_unlock(); return ret; } @@ -989,10 +991,11 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[]) nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL); - l4proto = nf_ct_l4proto_find_get(nf_ct_l3num(ct), nf_ct_protonum(ct)); + rcu_read_lock(); + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->from_nlattr) err = l4proto->from_nlattr(tb, ct); - nf_ct_l4proto_put(l4proto); + rcu_read_unlock(); return err; } -- cgit v1.2.3 From 711d60a9e7f88e394ccca10f5fc83f95f0cea5b1 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 18 Mar 2009 17:30:50 +0100 Subject: netfilter: remove nf_ct_l4proto_find_get/nf_ct_l4proto_put users have been moved to __nf_ct_l4proto_find. Signed-off-by: Florian Westphal Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto.c | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 592d73344d46..9a62b4efa0e1 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -74,27 +74,6 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); /* this is guaranteed to always return a valid protocol helper, since * it falls back to generic_protocol */ -struct nf_conntrack_l4proto * -nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto) -{ - struct nf_conntrack_l4proto *p; - - rcu_read_lock(); - p = __nf_ct_l4proto_find(l3proto, l4proto); - if (!try_module_get(p->me)) - p = &nf_conntrack_l4proto_generic; - rcu_read_unlock(); - - return p; -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); - -void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); - struct nf_conntrack_l3proto * nf_ct_l3proto_find_get(u_int16_t l3proto) { -- cgit v1.2.3 From 0f5b3e85a3716efebb0150ebb7c6d022e2bf17d7 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 18 Mar 2009 17:36:40 +0100 Subject: netfilter: ctnetlink: fix rcu context imbalance Introduced by 7ec47496 (netfilter: ctnetlink: cleanup master conntrack assignation): net/netfilter/nf_conntrack_netlink.c:1275:2: warning: context imbalance in 'ctnetlink_create_conntrack' - different lock contexts for basic block Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 57 +++++++++++++++--------------------- 1 file changed, 23 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 735ea9c1a96f..d1fe9d15ac5c 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1146,7 +1146,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], return ERR_PTR(-ENOMEM); if (!cda[CTA_TIMEOUT]) - goto err; + goto err1; ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; @@ -1157,10 +1157,8 @@ ctnetlink_create_conntrack(struct nlattr *cda[], char *helpname; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; helper = __nf_conntrack_helper_find_byname(helpname); if (helper == NULL) { @@ -1168,28 +1166,26 @@ ctnetlink_create_conntrack(struct nlattr *cda[], #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { err = -EOPNOTSUPP; - goto err; + goto err1; } rcu_read_lock(); helper = __nf_conntrack_helper_find_byname(helpname); if (helper) { - rcu_read_unlock(); err = -EAGAIN; - goto err; + goto err2; } rcu_read_unlock(); #endif err = -EOPNOTSUPP; - goto err; + goto err1; } else { struct nf_conn_help *help; help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help == NULL) { - rcu_read_unlock(); err = -ENOMEM; - goto err; + goto err2; } /* not in hash table yet so not strictly necessary */ @@ -1198,44 +1194,34 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } else { /* try an implicit helper assignation */ err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { err = ctnetlink_change_nat(ct, cda); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } #ifdef CONFIG_NF_NAT_NEEDED if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { err = ctnetlink_change_nat_seq_adj(ct, cda); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } #endif if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } nf_ct_acct_ext_add(ct, GFP_ATOMIC); @@ -1253,12 +1239,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[], err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3); if (err < 0) - goto err; + goto err2; master_h = __nf_conntrack_find(&init_net, &master); if (master_h == NULL) { err = -ENOENT; - goto err; + goto err2; } master_ct = nf_ct_tuplehash_to_ctrack(master_h); nf_conntrack_get(&master_ct->ct_general); @@ -1271,7 +1257,10 @@ ctnetlink_create_conntrack(struct nlattr *cda[], rcu_read_unlock(); return ct; -err: + +err2: + rcu_read_unlock(); +err1: nf_conntrack_free(ct); return ERR_PTR(err); } -- cgit v1.2.3 From 9bdd8d40c8c59435664af6049dabe24b7779b203 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Wed, 18 Mar 2009 18:22:48 -0700 Subject: ipv6: Fix incorrect disable_ipv6 behavior Fix the behavior of allowing both sysctl and addrconf_dad_failure() to set the disable_ipv6 parameter without any bad side-effects. If DAD fails and accept_dad > 1, we will still set disable_ipv6=1, but then instead of allowing an RA to add an address then immediately fail DAD, we simply don't allow the address to be added in the first place. This also lets the user set this flag and disable all IPv6 addresses on the interface, or on the entire system. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e83852ab4dc8..717584bad02e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -590,6 +590,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, { struct inet6_ifaddr *ifa = NULL; struct rt6_info *rt; + struct net *net = dev_net(idev->dev); int hash; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -606,6 +607,11 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, goto out2; } + if (idev->cnf.disable_ipv6 || net->ipv6.devconf_all->disable_ipv6) { + err = -EACCES; + goto out2; + } + write_lock(&addrconf_hash_lock); /* Ignore adding duplicate addresses on an interface */ @@ -1433,6 +1439,11 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) void addrconf_dad_failure(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; + + if (net_ratelimit()) + printk(KERN_INFO "%s: IPv6 duplicate address detected!\n", + ifp->idev->dev->name); + if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { struct in6_addr addr; @@ -1443,11 +1454,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) ipv6_addr_equal(&ifp->addr, &addr)) { /* DAD failed for link-local based on MAC address */ idev->cnf.disable_ipv6 = 1; + + printk(KERN_INFO "%s: IPv6 being disabled!\n", + ifp->idev->dev->name); } } - if (net_ratelimit()) - printk(KERN_INFO "%s: duplicate address detected!\n", ifp->idev->dev->name); addrconf_dad_stop(ifp); } @@ -2823,11 +2835,6 @@ static void addrconf_dad_timer(unsigned long data) read_unlock_bh(&idev->lock); goto out; } - if (idev->cnf.accept_dad > 1 && idev->cnf.disable_ipv6) { - read_unlock_bh(&idev->lock); - addrconf_dad_failure(ifp); - return; - } spin_lock_bh(&ifp->lock); if (ifp->probes == 0) { /* -- cgit v1.2.3 From beedad923ad6237f03265fdf86eb8a1b50d14ae9 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Wed, 18 Mar 2009 18:50:09 -0700 Subject: tcp: remove parameter from tcp_recv_urg(). This patch removes an unused parameter (addr_len) from tcp_recv_urg() method in net/ipv4/tcp.c. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1c4d42ff72bd..2451aeb5ac23 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1082,8 +1082,7 @@ out_err: */ static int tcp_recv_urg(struct sock *sk, long timeo, - struct msghdr *msg, int len, int flags, - int *addr_len) + struct msghdr *msg, int len, int flags) { struct tcp_sock *tp = tcp_sk(sk); @@ -1698,7 +1697,7 @@ out: return err; recv_urg: - err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); + err = tcp_recv_urg(sk, timeo, msg, len, flags); goto out; } -- cgit v1.2.3 From 1b1d8f73a44fe1796a73a97ebcb08cad18b97f07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 18 Mar 2009 18:56:54 -0700 Subject: ipv6: fix display of local and remote sit endpoints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fixes the regressions cause by commit 1326c3d5a4b792a2b15877feb7fb691f8945d203 (v2.6.28-rc6-461-g23a12b1) broke the display of local and remote addresses of an SIT tunnel in iproute2. nt->parms is used by ipip6_tunnel_init() and therefore need to be initialized first. Tracked as http://bugzilla.kernel.org/show_bug.cgi?id=12868 Reported-by: Jan Engelhardt Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index d3467e563f02..5cee2bcbcece 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -188,9 +188,9 @@ static struct ip_tunnel * ipip6_tunnel_locate(struct net *net, } nt = netdev_priv(dev); - ipip6_tunnel_init(dev); nt->parms = *parms; + ipip6_tunnel_init(dev); if (parms->i_flags & SIT_ISATAP) dev->priv_flags |= IFF_ISATAP; -- cgit v1.2.3 From 4b704d59d6fb152bcd0883b84af5936a29067f12 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 18 Mar 2009 19:11:29 -0700 Subject: tipc: fix non-const printf format arguments Fix warnings from current gcc about using non-const strings as printf args in TIPC. Compile tested only (not a TIPC user). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/tipc/bcast.c | 4 ++-- net/tipc/bcast.h | 2 +- net/tipc/dbg.c | 2 +- net/tipc/node.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 3ddaff42d1bb..a3bfd4064912 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -119,7 +119,7 @@ static struct bclink *bclink = NULL; static struct link *bcl = NULL; static DEFINE_SPINLOCK(bc_lock); -char tipc_bclink_name[] = "multicast-link"; +const char tipc_bclink_name[] = "multicast-link"; static u32 buf_seqno(struct sk_buff *buf) @@ -800,7 +800,7 @@ int tipc_bclink_init(void) tipc_link_set_queue_limits(bcl, BCLINK_WIN_DEFAULT); bcl->b_ptr = &bcbearer->bearer; bcl->state = WORKING_WORKING; - sprintf(bcl->name, tipc_bclink_name); + strlcpy(bcl->name, tipc_bclink_name, TIPC_MAX_LINK_NAME); if (BCLINK_LOG_BUF_SIZE) { char *pb = kmalloc(BCLINK_LOG_BUF_SIZE, GFP_ATOMIC); diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index 2f2d731bc1c2..4c1771e95c99 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -70,7 +70,7 @@ struct port_list { struct tipc_node; -extern char tipc_bclink_name[]; +extern const char tipc_bclink_name[]; /** diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index 29ecae851668..1885a7edb0c8 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c @@ -258,7 +258,7 @@ void tipc_printf(struct print_buf *pb, const char *fmt, ...) } if (pb->echo) - printk(print_string); + printk("%s", print_string); spin_unlock_bh(&print_lock); } diff --git a/net/tipc/node.c b/net/tipc/node.c index 20d98c56e152..2c24e7d6d950 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -703,7 +703,7 @@ struct sk_buff *tipc_node_get_links(const void *req_tlv_area, int req_tlv_space) link_info.dest = htonl(tipc_own_addr & 0xfffff00); link_info.up = htonl(1); - sprintf(link_info.str, tipc_bclink_name); + strlcpy(link_info.str, tipc_bclink_name, TIPC_MAX_LINK_NAME); tipc_cfg_append_tlv(buf, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); /* Add TLVs for any other links in scope */ -- cgit v1.2.3 From cb0dc77de0d23615a845e45844a2e22fc224d7fe Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 18 Mar 2009 19:12:42 -0700 Subject: net: fix sctp breakage broken by commit 5e739d1752aca4e8f3e794d431503bfca3162df4; AFAICS should be -stable fodder as well... Signed-off-by: Al Viro Aced-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/endpointola.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 4c8d9f45ce09..905fda582b92 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -111,7 +111,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, if (sctp_addip_enable) { auth_chunks->chunks[0] = SCTP_CID_ASCONF; auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; - auth_chunks->param_hdr.length += htons(2); + auth_chunks->param_hdr.length = + htons(sizeof(sctp_paramhdr_t) + 2); } } -- cgit v1.2.3 From e4a389a9b5c892446b5de2038bdc0cca8703c615 Mon Sep 17 00:00:00 2001 From: Roel Kluin Date: Wed, 18 Mar 2009 23:12:13 -0700 Subject: net: kfree(napi->skb) => kfree_skb struct sk_buff pointers should be freed with kfree_skb. Signed-off-by: Roel Kluin Signed-off-by: David S. Miller --- net/core/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 2565f6d1d661..e3fe5c705606 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2672,7 +2672,7 @@ void netif_napi_del(struct napi_struct *napi) struct sk_buff *skb, *next; list_del_init(&napi->dev_list); - kfree(napi->skb); + kfree_skb(napi->skb); for (skb = napi->gro_list; skb; skb = next) { next = skb->next; -- cgit v1.2.3 From 2bad35b7c9588eb5e65c03bcae54e7eb6b1a6504 Mon Sep 17 00:00:00 2001 From: "Jorge Boncompte [DTI2]" Date: Wed, 18 Mar 2009 23:26:11 -0700 Subject: netns: oops in ip[6]_frag_reasm incrementing stats dev can be NULL in ip[6]_frag_reasm for skb's coming from RAW sockets. Quagga's OSPFD sends fragmented packets on a RAW socket, when netfilter conntrack reassembles them on the OUTPUT path you hit this code path. You can test it with something like "hping2 -0 -d 2000 -f AA.BB.CC.DD" With help from Jarek Poplawski. Signed-off-by: Jorge Boncompte [DTI2] Signed-off-by: David S. Miller --- net/ipv4/ip_fragment.c | 3 ++- net/ipv6/reassembly.c | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 6659ac000eeb..7985346653bd 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -463,6 +463,7 @@ err: static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, struct net_device *dev) { + struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct iphdr *iph; struct sk_buff *fp, *head = qp->q.fragments; int len; @@ -548,7 +549,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, iph = ip_hdr(head); iph->frag_off = 0; iph->tot_len = htons(len); - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_REASMOKS); + IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS); qp->q.fragments = NULL; return 0; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 3c575118fca5..e9ac7a12f595 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -452,6 +452,7 @@ err: static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { + struct net *net = container_of(fq->q.net, struct net, ipv6.frags); struct sk_buff *fp, *head = fq->q.fragments; int payload_len; unsigned int nhoff; @@ -551,8 +552,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->csum); rcu_read_lock(); - IP6_INC_STATS_BH(dev_net(dev), - __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; return 1; @@ -566,8 +566,7 @@ out_oom: printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); - IP6_INC_STATS_BH(dev_net(dev), - __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); return -1; } -- cgit v1.2.3 From 7d1e8255cf959fba7ee2317550dfde39f0b936ae Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:03 -0400 Subject: SUNRPC: Add the equivalent of the linger and linger2 timeouts to RPC sockets This fixes a regression against FreeBSD servers as reported by Tomas Kasparek. Apparently when using RPC over a TCP socket, the FreeBSD servers don't ever react to the client closing the socket, and so commit e06799f958bf7f9f8fae15f0c6f519953fb0257c (SUNRPC: Use shutdown() instead of close() when disconnecting a TCP socket) causes the setup to hang forever whenever the client attempts to close and then reconnect. We break the deadlock by adding a 'linger2' style timeout to the socket, after which, the client will abort the connection using a TCP 'RST'. The default timeout is set to 15 seconds. A subsequent patch will put it under user control by means of a systctl. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 98 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 2e070679ab4a..b51f58b95c39 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -49,6 +49,8 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; +#define XS_TCP_LINGER_TO (15U * HZ) + /* * We can register our own files under /proc/sys/sunrpc by * calling register_sysctl_table() again. The files in that @@ -806,6 +808,7 @@ static void xs_close(struct rpc_xprt *xprt) xs_reset_transport(transport); smp_mb__before_clear_bit(); + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); clear_bit(XPRT_CLOSING, &xprt->state); smp_mb__after_clear_bit(); @@ -1133,6 +1136,47 @@ out: read_unlock(&sk->sk_callback_lock); } +/* + * Do the equivalent of linger/linger2 handling for dealing with + * broken servers that don't close the socket in a timely + * fashion + */ +static void xs_tcp_schedule_linger_timeout(struct rpc_xprt *xprt, + unsigned long timeout) +{ + struct sock_xprt *transport; + + if (xprt_test_and_set_connecting(xprt)) + return; + set_bit(XPRT_CONNECTION_ABORT, &xprt->state); + transport = container_of(xprt, struct sock_xprt, xprt); + queue_delayed_work(rpciod_workqueue, &transport->connect_worker, + timeout); +} + +static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport; + + transport = container_of(xprt, struct sock_xprt, xprt); + + if (!test_bit(XPRT_CONNECTION_ABORT, &xprt->state) || + !cancel_delayed_work(&transport->connect_worker)) + return; + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); + xprt_clear_connecting(xprt); +} + +static void xs_sock_mark_closed(struct rpc_xprt *xprt) +{ + smp_mb__before_clear_bit(); + clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + clear_bit(XPRT_CLOSING, &xprt->state); + smp_mb__after_clear_bit(); + /* Mark transport as closed and wake up all pending tasks */ + xprt_disconnect_done(xprt); +} + /** * xs_tcp_state_change - callback to handle TCP socket state changes * @sk: socket whose state has changed @@ -1178,6 +1222,7 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_clear_bit(); + xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ @@ -1194,17 +1239,14 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_LAST_ACK: set_bit(XPRT_CLOSING, &xprt->state); + xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); break; case TCP_CLOSE: - smp_mb__before_clear_bit(); - clear_bit(XPRT_CLOSE_WAIT, &xprt->state); - clear_bit(XPRT_CLOSING, &xprt->state); - smp_mb__after_clear_bit(); - /* Mark transport as closed and wake up all pending tasks */ - xprt_disconnect_done(xprt); + xs_tcp_cancel_linger_timeout(xprt); + xs_sock_mark_closed(xprt); } out: read_unlock(&sk->sk_callback_lock); @@ -1562,8 +1604,8 @@ static void xs_udp_connect_worker4(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** @@ -1604,8 +1646,8 @@ static void xs_udp_connect_worker6(struct work_struct *work) xs_udp_finish_connecting(xprt, sock); status = 0; out: - xprt_wake_pending_tasks(xprt, status); xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /* @@ -1626,7 +1668,9 @@ static void xs_abort_connection(struct rpc_xprt *xprt, struct sock_xprt *transpo memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; result = kernel_connect(transport->sock, &any, sizeof(any), 0); - if (result) + if (!result) + xs_sock_mark_closed(xprt); + else dprintk("RPC: AF_UNSPEC connect return code %d\n", result); } @@ -1702,6 +1746,7 @@ static void xs_tcp_connect_worker4(struct work_struct *work) goto out; if (!sock) { + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); /* start from scratch */ if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); @@ -1713,10 +1758,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) sock_release(sock); goto out; } - } else + } else { + int abort_and_exit; + + abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, + &xprt->state); /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt, transport); + if (abort_and_exit) + goto out_eagain; + } + dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1732,17 +1785,18 @@ static void xs_tcp_connect_worker4(struct work_struct *work) case 0: case -EINPROGRESS: case -EALREADY: - goto out_clear; + xprt_clear_connecting(xprt); + return; } /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); printk("%s: connect returned unhandled error %d\n", __func__, status); +out_eagain: status = -EAGAIN; out: - xprt_wake_pending_tasks(xprt, status); -out_clear: xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** @@ -1763,6 +1817,7 @@ static void xs_tcp_connect_worker6(struct work_struct *work) goto out; if (!sock) { + clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); /* start from scratch */ if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { dprintk("RPC: can't create TCP transport socket (%d).\n", -err); @@ -1774,10 +1829,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) sock_release(sock); goto out; } - } else + } else { + int abort_and_exit; + + abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, + &xprt->state); /* "close" the socket, preserving the local port */ xs_tcp_reuse_connection(xprt, transport); + if (abort_and_exit) + goto out_eagain; + } + dprintk("RPC: worker connecting xprt %p to address: %s\n", xprt, xprt->address_strings[RPC_DISPLAY_ALL]); @@ -1792,17 +1855,18 @@ static void xs_tcp_connect_worker6(struct work_struct *work) case 0: case -EINPROGRESS: case -EALREADY: - goto out_clear; + xprt_clear_connecting(xprt); + return; } /* get rid of existing socket, and retry */ xs_tcp_shutdown(xprt); printk("%s: connect returned unhandled error %d\n", __func__, status); +out_eagain: status = -EAGAIN; out: - xprt_wake_pending_tasks(xprt, status); -out_clear: xprt_clear_connecting(xprt); + xprt_wake_pending_tasks(xprt, status); } /** -- cgit v1.2.3 From 25fe6142a57c720452c5e9ddbc1f32309c1e5c19 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:03 -0400 Subject: SUNRPC: Add a sysctl to control the duration of the socket linger timeout Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b51f58b95c39..42222b4dd76d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -50,6 +50,7 @@ unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; #define XS_TCP_LINGER_TO (15U * HZ) +static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; /* * We can register our own files under /proc/sys/sunrpc by @@ -118,6 +119,14 @@ static ctl_table xs_tunables_table[] = { .extra1 = &xprt_min_resvport_limit, .extra2 = &xprt_max_resvport_limit }, + { + .procname = "tcp_fin_timeout", + .data = &xs_tcp_fin_timeout, + .maxlen = sizeof(xs_tcp_fin_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = sysctl_jiffies + }, { .ctl_name = 0, }, @@ -1222,7 +1231,7 @@ static void xs_tcp_state_change(struct sock *sk) clear_bit(XPRT_CONNECTED, &xprt->state); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); smp_mb__after_clear_bit(); - xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); + xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); break; case TCP_CLOSE_WAIT: /* The server initiated a shutdown of the socket */ @@ -1239,7 +1248,7 @@ static void xs_tcp_state_change(struct sock *sk) break; case TCP_LAST_ACK: set_bit(XPRT_CLOSING, &xprt->state); - xs_tcp_schedule_linger_timeout(xprt, XS_TCP_LINGER_TO); + xs_tcp_schedule_linger_timeout(xprt, xs_tcp_fin_timeout); smp_mb__before_clear_bit(); clear_bit(XPRT_CONNECTED, &xprt->state); smp_mb__after_clear_bit(); -- cgit v1.2.3 From b61d59fffd3e5b6037c92b4c840605831de8a251 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 14:38:04 -0400 Subject: =?UTF-8?q?SUNRPC:=C2=A0xs=5Ftcp=5Fconnect=5Fworker{4,6}:=20merge?= =?UTF-8?q?=20common=20code?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 140 ++++++++++++++++++++++++++------------------------ 1 file changed, 72 insertions(+), 68 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 42222b4dd76d..f05a56e597ef 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1738,33 +1738,29 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) } /** - * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint - * @work: RPC transport to connect + * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint + * @xprt: RPC transport to connect + * @transport: socket transport to connect + * @create_sock: function to create a socket of the correct type * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker4(struct work_struct *work) +static void xs_tcp_setup_socket(struct rpc_xprt *xprt, + struct sock_xprt *transport, + struct socket *(*create_sock)(struct rpc_xprt *, + struct sock_xprt *)) { - struct sock_xprt *transport = - container_of(work, struct sock_xprt, connect_worker.work); - struct rpc_xprt *xprt = &transport->xprt; struct socket *sock = transport->sock; - int err, status = -EIO; + int status = -EIO; if (xprt->shutdown) goto out; if (!sock) { clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - /* start from scratch */ - if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket4(sock); - - if (xs_bind4(transport, sock) < 0) { - sock_release(sock); + sock = create_sock(xprt, transport); + if (IS_ERR(sock)) { + status = PTR_ERR(sock); goto out; } } else { @@ -1808,74 +1804,82 @@ out: xprt_wake_pending_tasks(xprt, status); } +static struct socket *xs_create_tcp_sock4(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; + + /* start from scratch */ + err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", + -err); + goto out_err; + } + xs_reclassify_socket4(sock); + + if (xs_bind4(transport, sock) < 0) { + sock_release(sock); + goto out_err; + } + return sock; +out_err: + return ERR_PTR(-EIO); +} + /** - * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * xs_tcp_connect_worker4 - connect a TCP socket to a remote endpoint * @work: RPC transport to connect * * Invoked by a work queue tasklet. */ -static void xs_tcp_connect_worker6(struct work_struct *work) +static void xs_tcp_connect_worker4(struct work_struct *work) { struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; - int err, status = -EIO; - if (xprt->shutdown) - goto out; - - if (!sock) { - clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); - /* start from scratch */ - if ((err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) { - dprintk("RPC: can't create TCP transport socket (%d).\n", -err); - goto out; - } - xs_reclassify_socket6(sock); + xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock4); +} - if (xs_bind6(transport, sock) < 0) { - sock_release(sock); - goto out; - } - } else { - int abort_and_exit; +static struct socket *xs_create_tcp_sock6(struct rpc_xprt *xprt, + struct sock_xprt *transport) +{ + struct socket *sock; + int err; - abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT, - &xprt->state); - /* "close" the socket, preserving the local port */ - xs_tcp_reuse_connection(xprt, transport); + /* start from scratch */ + err = sock_create_kern(PF_INET6, SOCK_STREAM, IPPROTO_TCP, &sock); + if (err < 0) { + dprintk("RPC: can't create TCP transport socket (%d).\n", + -err); + goto out_err; + } + xs_reclassify_socket6(sock); - if (abort_and_exit) - goto out_eagain; + if (xs_bind6(transport, sock) < 0) { + sock_release(sock); + goto out_err; } + return sock; +out_err: + return ERR_PTR(-EIO); +} - dprintk("RPC: worker connecting xprt %p to address: %s\n", - xprt, xprt->address_strings[RPC_DISPLAY_ALL]); +/** + * xs_tcp_connect_worker6 - connect a TCP socket to a remote endpoint + * @work: RPC transport to connect + * + * Invoked by a work queue tasklet. + */ +static void xs_tcp_connect_worker6(struct work_struct *work) +{ + struct sock_xprt *transport = + container_of(work, struct sock_xprt, connect_worker.work); + struct rpc_xprt *xprt = &transport->xprt; - status = xs_tcp_finish_connecting(xprt, sock); - dprintk("RPC: %p connect status %d connected %d sock state %d\n", - xprt, -status, xprt_connected(xprt), sock->sk->sk_state); - switch (status) { - case -ECONNREFUSED: - case -ECONNRESET: - case -ENETUNREACH: - /* retry with existing socket, after a delay */ - case 0: - case -EINPROGRESS: - case -EALREADY: - xprt_clear_connecting(xprt); - return; - } - /* get rid of existing socket, and retry */ - xs_tcp_shutdown(xprt); - printk("%s: connect returned unhandled error %d\n", - __func__, status); -out_eagain: - status = -EAGAIN; -out: - xprt_clear_connecting(xprt); - xprt_wake_pending_tasks(xprt, status); + xs_tcp_setup_socket(xprt, transport, xs_create_tcp_sock6); } /** -- cgit v1.2.3 From 55420c24a0d4d1fce70ca713f84aa00b6b74a70e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 11 Mar 2009 15:29:24 -0400 Subject: SUNRPC: Ensure we close the socket on EPIPE errors too... As long as one task is holding the socket lock, then calls to xprt_force_disconnect(xprt) will not succeed in shutting down the socket. In particular, this would mean that a server initiated shutdown will not succeed until the lock is relinquished. In order to avoid the deadlock, we should ensure that xs_tcp_send_request() closes the socket on EPIPE errors too. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f05a56e597ef..fbc8725c20cb 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -726,10 +726,10 @@ static int xs_tcp_send_request(struct rpc_task *task) dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); case -ECONNRESET: + case -EPIPE: xs_tcp_shutdown(xprt); case -ECONNREFUSED: case -ENOTCONN: - case -EPIPE: clear_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags); } out: -- cgit v1.2.3 From 2e3c230bc7149a6af65d26a0c312e230e0c33cc3 Mon Sep 17 00:00:00 2001 From: Tom Talpey Date: Thu, 12 Mar 2009 22:21:21 -0400 Subject: SVCRDMA: fix recent printk format warnings. printk formats in prior commit were reversed/incorrect. Compiled without warning on x86 and x86_64, but detected on ppc. Signed-off-by: Tom Talpey Signed-off-by: Trond Myklebust --- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index d0bea987d80e..6c26a675435a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -235,7 +235,7 @@ static int map_xdr(struct svcxprt_rdma *xprt, } dprintk("svcrdma: map_xdr: sge_no %d page_no %d " - "page_base %zd page_len %zd head_len %d tail_len %d\n", + "page_base %u page_len %u head_len %zu tail_len %zu\n", sge_no, page_no, xdr->page_base, xdr->page_len, xdr->head[0].iov_len, xdr->tail[0].iov_len); -- cgit v1.2.3 From f3f9258678b081c3ef2f036aef450cd2053ef419 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 20 Mar 2009 17:57:36 +0200 Subject: nl80211: Check that function pointer != NULL before using it NL80211_CMD_GET_MESH_PARAMS and NL80211_CMD_SET_MESH_PARAMS handlers did not verify whether a function pointer is NULL (not supported by the driver) before trying to call the function. The former nl80211 command is available for unprivileged users, too, so this can potentially allow normal users to kill networking (or worse..) if mac80211 is built without CONFIG_MAC80211_MESH=y. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 1e728fff474e..31b807af3235 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1908,6 +1908,11 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, if (err) return err; + if (!drv->ops->get_mesh_params) { + err = -EOPNOTSUPP; + goto out; + } + /* Get the mesh params */ rtnl_lock(); err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params); @@ -2017,6 +2022,11 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (!drv->ops->set_mesh_params) { + err = -EOPNOTSUPP; + goto out; + } + /* This makes sure that there aren't more than 32 mesh config * parameters (otherwise our bitfield scheme would not work.) */ BUILD_BUG_ON(NL80211_MESHCONF_ATTR_MAX > 32); @@ -2061,6 +2071,7 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask); rtnl_unlock(); + out: /* cleanup */ cfg80211_put_dev(drv); dev_put(dev); -- cgit v1.2.3 From 60784427ab331dc13c070ac4b0cc9a735bdfa9c0 Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Sat, 21 Mar 2009 13:33:18 -0700 Subject: ax25: SOCK_DEBUG message simplification This patch condenses two debug messages in one. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 8f8f63ff6566..fd9d06f291dc 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1529,10 +1529,8 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, dp = ax25->digipeat; } - SOCK_DEBUG(sk, "AX.25: sendto: Addresses built.\n"); - /* Build a packet */ - SOCK_DEBUG(sk, "AX.25: sendto: building packet.\n"); + SOCK_DEBUG(sk, "AX.25: sendto: Addresses built. Building packet.\n"); /* Assume the worst case */ size = len + ax25->ax25_dev->dev->hard_header_len; -- cgit v1.2.3 From f99bcff7a290768e035f3d4726e103c6ebe858bf Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Sat, 21 Mar 2009 13:33:55 -0700 Subject: ax25: zero length frame filtering in AX25 In previous commit 244f46ae6e9e18f6fc0be7d1f49febde4762c34b was introduced a zero length frame filter for ROSE protocole. This patch has the same purpose at AX25 frame level for the same reason. Empty frames have no meaning in AX25 protocole. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fd9d06f291dc..7da5ebb84e97 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1435,6 +1435,11 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, size_t size; int lv, err, addr_len = msg->msg_namelen; + /* AX.25 empty data frame has no meaning : don't send */ + if (len == 0) { + return (0); + } + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1634,6 +1639,13 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; + /* AX.25 empty data frame has no meaning : ignore it */ + if (copied == 0) { + err = copied; + skb_free_datagram(sk, skb); + goto out; + } + if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; -- cgit v1.2.3 From a3ac80a130300573de351083cf4a5b46d233e8bf Mon Sep 17 00:00:00 2001 From: Bernard Pidoux Date: Sat, 21 Mar 2009 13:34:20 -0700 Subject: netrom: zero length frame filtering in NetRom A zero length frame filter was recently introduced in ROSE protocole. Previous commit makes the same at AX25 protocole level. This patch has the same purpose for NetRom protocole. The reason is that empty frames have no meaning in NetRom protocole. Signed-off-by: Bernard Pidoux Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index cba7849de98e..6d9c58ec56ac 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1037,6 +1037,10 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int size; + /* Netrom empty data frame has no meaning : don't send */ + if (len == 0) + return 0; + if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1167,6 +1171,11 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; + /* NetRom empty data frame has no meaning : ignore it */ + if (copied == 0) { + goto out; + } + if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; @@ -1182,7 +1191,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = sizeof(*sax); - skb_free_datagram(sk, skb); +out: skb_free_datagram(sk, skb); release_sock(sk); return copied; -- cgit v1.2.3 From a0bffffc148cd8e75a48a89ad2ddb74e4081a20a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Sat, 21 Mar 2009 13:36:17 -0700 Subject: net/*: use linux/kernel.h swap() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit tcp_sack_swap seems unnecessary so I pushed swap to the caller. Also removed comment that seemed then pointless, and added include when not already there. Compile tested. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/decnet/dn_route.c | 6 +----- net/ipv4/tcp_input.c | 23 +++-------------------- net/ipv6/addrconf.c | 7 ++----- net/sched/sch_tbf.c | 9 ++------- 4 files changed, 8 insertions(+), 37 deletions(-) (limited to 'net') diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5130dee0b384..0cc4394117df 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -380,7 +380,6 @@ static int dn_return_short(struct sk_buff *skb) unsigned char *ptr; __le16 *src; __le16 *dst; - __le16 tmp; /* Add back headers */ skb_push(skb, skb->data - skb_network_header(skb)); @@ -399,10 +398,7 @@ static int dn_return_short(struct sk_buff *skb) ptr += 2; *ptr = 0; /* Zero hop count */ - /* Swap source and destination */ - tmp = *src; - *src = *dst; - *dst = tmp; + swap(*src, *dst); skb->pkt_type = PACKET_OUTGOING; dn_rt_finish_output(skb, NULL, NULL); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fae78e3eccc4..8ac82b3703ab 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -64,6 +64,7 @@ #include #include #include +#include #include #include #include @@ -1802,11 +1803,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, for (i = used_sacks - 1; i > 0; i--) { for (j = 0; j < i; j++) { if (after(sp[j].start_seq, sp[j + 1].start_seq)) { - struct tcp_sack_block tmp; - - tmp = sp[j]; - sp[j] = sp[j + 1]; - sp[j + 1] = tmp; + swap(sp[j], sp[j + 1]); /* Track where the first SACK block goes to */ if (j == first_sack_index) @@ -4156,20 +4153,6 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp) } } -static inline void tcp_sack_swap(struct tcp_sack_block *sack1, - struct tcp_sack_block *sack2) -{ - __u32 tmp; - - tmp = sack1->start_seq; - sack1->start_seq = sack2->start_seq; - sack2->start_seq = tmp; - - tmp = sack1->end_seq; - sack1->end_seq = sack2->end_seq; - sack2->end_seq = tmp; -} - static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); @@ -4184,7 +4167,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq) if (tcp_sack_extend(sp, seq, end_seq)) { /* Rotate this_sack to the first one. */ for (; this_sack > 0; this_sack--, sp--) - tcp_sack_swap(sp, sp - 1); + swap(*sp, *(sp - 1)); if (cur_sacks > 1) tcp_sack_maybe_coalesce(tp); return; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 717584bad02e..8499da9e76a2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -40,6 +40,7 @@ #include #include +#include #include #include #include @@ -1215,16 +1216,12 @@ int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, } break; } else if (minihiscore < miniscore) { - struct ipv6_saddr_score *tmp; - if (hiscore->ifa) in6_ifa_put(hiscore->ifa); in6_ifa_hold(score->ifa); - tmp = hiscore; - hiscore = score; - score = tmp; + swap(hiscore, score); /* restore our iterator */ score->ifa = hiscore->ifa; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index a2f93c09f3cc..e22dfe85e43e 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -236,7 +236,6 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) struct tc_tbf_qopt *qopt; struct qdisc_rate_table *rtab = NULL; struct qdisc_rate_table *ptab = NULL; - struct qdisc_rate_table *tmp; struct Qdisc *child = NULL; int max_size,n; @@ -295,13 +294,9 @@ static int tbf_change(struct Qdisc* sch, struct nlattr *opt) q->tokens = q->buffer; q->ptokens = q->mtu; - tmp = q->R_tab; - q->R_tab = rtab; - rtab = tmp; + swap(q->R_tab, rtab); + swap(q->P_tab, ptab); - tmp = q->P_tab; - q->P_tab = ptab; - ptab = tmp; sch_tree_unlock(sch); err = 0; done: -- cgit v1.2.3 From 1f1900f935e810d01c716fa2aaf8c9d25caa4151 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 21 Mar 2009 13:37:28 -0700 Subject: atm: lec use dev_change_mtu Rather than calling device pointer directly (which is incorrect with net_device_ops), use the standard dev_change_mtu. Compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/atm/lec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/atm/lec.c b/net/atm/lec.c index c0cba9a037e8..199b6bb79f42 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -502,7 +502,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) priv->lane2_ops = NULL; if (priv->lane_version > 1) priv->lane2_ops = &lane2_ops; - if (dev->change_mtu(dev, mesg->content.config.mtu)) + if (dev_set_mtu(dev, mesg->content.config.mtu)) printk("%s: change_mtu to %d failed\n", dev->name, mesg->content.config.mtu); priv->is_proxy = mesg->content.config.is_proxy; -- cgit v1.2.3 From 9247744e5eaa29aecee5342a0c8694187a6aadcd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 21 Mar 2009 13:39:26 -0700 Subject: skb: expose and constify hash primitives Some minor changes to queue hashing: 1. Use const on accessor functions 2. Export skb_tx_hash for use in drivers (see ixgbe) Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ca212acd3348..fdb9973b82a6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1725,7 +1725,7 @@ out_kfree_skb: static u32 skb_tx_hashrnd; -static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb) +u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb) { u32 hash; @@ -1740,6 +1740,7 @@ static u16 skb_tx_hash(struct net_device *dev, struct sk_buff *skb) return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); } +EXPORT_SYMBOL(skb_tx_hash); static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb) -- cgit v1.2.3 From 8d2f9e81169b8120cf2b4872930ae491b17c27b8 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Sat, 21 Mar 2009 13:41:09 -0700 Subject: sctp: Clean up TEST_FRAME hacks. Remove 2 TEST_FRAME hacks that are no longer needed. These allowed sctp regression tests to compile before, but are no longer needed. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/output.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 07d58903a746..7d08f522ec84 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -49,13 +49,10 @@ #include #include #include +#include #include #include -#ifndef TEST_FRAME -#include -#endif /* TEST_FRAME (not defined) */ - #include /* for sa_family_t */ #include -- cgit v1.2.3 From ed734a97c6a81b644bd648afd7a337deb0ccd7e5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 21 Mar 2009 13:42:55 -0700 Subject: net: remove useless prefetch() call There is no gain using prefetch() in dev_hard_start_xmit(), since we already had to read ops->ndo_select_queue pointer in dev_pick_tx(), and both pointers are probably located in the same cache line. This prefetch call slows down fast path because of a stall in address computation. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index fdb9973b82a6..052dd478d3e1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1670,7 +1670,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; int rc; - prefetch(&dev->netdev_ops->ndo_start_xmit); if (likely(!skb->next)) { if (!list_empty(&ptype_all)) dev_queue_xmit_nit(skb, dev); -- cgit v1.2.3 From 7ca98fa234afa096ec2a5e7195ad2d32555cca86 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Mar 2009 05:43:14 +0000 Subject: snap: use const for descriptor Protocols should be able to use constant value for the descriptor. Minor whitespace cleanup as well Signed-off-by: Stephen Hemminger Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/802/psnap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/802/psnap.c b/net/802/psnap.c index bdbffa3cb043..6fea0750662b 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -29,7 +29,7 @@ static struct llc_sap *snap_sap; /* * Find a snap client by matching the 5 bytes. */ -static struct datalink_proto *find_snap_client(unsigned char *desc) +static struct datalink_proto *find_snap_client(const unsigned char *desc) { struct datalink_proto *proto = NULL, *p; @@ -122,7 +122,7 @@ module_exit(snap_exit); /* * Register SNAP clients. We don't yet use this for IP. */ -struct datalink_proto *register_snap_client(unsigned char *desc, +struct datalink_proto *register_snap_client(const unsigned char *desc, int (*rcvfunc)(struct sk_buff *, struct net_device *, struct packet_type *, @@ -137,7 +137,7 @@ struct datalink_proto *register_snap_client(unsigned char *desc, proto = kmalloc(sizeof(*proto), GFP_ATOMIC); if (proto) { - memcpy(proto->type, desc,5); + memcpy(proto->type, desc, 5); proto->rcvfunc = rcvfunc; proto->header_length = 5 + 3; /* snap + 802.2 */ proto->request = snap_request; -- cgit v1.2.3 From fa665ccf01440644a3956ed039e51e1088cd0f15 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Mar 2009 05:45:39 +0000 Subject: ipx: use constant for strings and desciptor Fix compiler warning about non-const format string. Signed-off-by: Stephen Hemminger Acked-by: Arnaldo Carvalho de Melo Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 30bd322b7985..1627050e29fd 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1975,15 +1975,15 @@ static struct notifier_block ipx_dev_notifier = { extern struct datalink_proto *make_EII_client(void); extern void destroy_EII_client(struct datalink_proto *); -static unsigned char ipx_8022_type = 0xE0; -static unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; -static char ipx_EII_err_msg[] __initdata = +static const unsigned char ipx_8022_type = 0xE0; +static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 }; +static const char ipx_EII_err_msg[] __initconst = KERN_CRIT "IPX: Unable to register with Ethernet II\n"; -static char ipx_8023_err_msg[] __initdata = +static const char ipx_8023_err_msg[] __initconst = KERN_CRIT "IPX: Unable to register with 802.3\n"; -static char ipx_llc_err_msg[] __initdata = +static const char ipx_llc_err_msg[] __initconst = KERN_CRIT "IPX: Unable to register with 802.2\n"; -static char ipx_snap_err_msg[] __initdata = +static const char ipx_snap_err_msg[] __initconst = KERN_CRIT "IPX: Unable to register with SNAP\n"; static int __init ipx_init(void) -- cgit v1.2.3 From c084080151e1de92159f8437fde34b6e5bebe35e Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Fri, 20 Mar 2009 09:49:49 +0000 Subject: dsa: set ->iflink on slave interfaces to the ifindex of the parent ..so that we can parse the DSA topology from 'ip link' output: 1: lo: mtu 16436 qdisc noqueue 2: eth0: mtu 1500 qdisc pfifo_fast qlen 1000 3: eth1: mtu 1500 qdisc pfifo_fast qlen 1000 4: lan1@eth0: mtu 1500 qdisc noqueue 5: lan2@eth0: mtu 1500 qdisc noqueue 6: lan3@eth0: mtu 1500 qdisc noqueue 7: lan4@eth0: mtu 1500 qdisc noqueue Signed-off-by: Lennert Buytenhek Signed-off-by: David S. Miller --- net/dsa/slave.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/dsa/slave.c b/net/dsa/slave.c index a68fd79e9eca..99114e5b32e4 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -48,6 +48,16 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) /* slave device handling ****************************************************/ +static int dsa_slave_init(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + dev->iflink = master->ifindex; + + return 0; +} + static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -288,6 +298,7 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { #ifdef CONFIG_NET_DSA_TAG_DSA static const struct net_device_ops dsa_netdev_ops = { + .ndo_init = dsa_slave_init, .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, .ndo_start_xmit = dsa_xmit, @@ -300,6 +311,7 @@ static const struct net_device_ops dsa_netdev_ops = { #endif #ifdef CONFIG_NET_DSA_TAG_EDSA static const struct net_device_ops edsa_netdev_ops = { + .ndo_init = dsa_slave_init, .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, .ndo_start_xmit = edsa_xmit, @@ -312,6 +324,7 @@ static const struct net_device_ops edsa_netdev_ops = { #endif #ifdef CONFIG_NET_DSA_TAG_TRAILER static const struct net_device_ops trailer_netdev_ops = { + .ndo_init = dsa_slave_init, .ndo_open = dsa_slave_open, .ndo_stop = dsa_slave_close, .ndo_start_xmit = trailer_xmit, -- cgit v1.2.3 From 076d3e10a54caa2c148de5732c126c7a31381d48 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Fri, 20 Mar 2009 09:50:39 +0000 Subject: dsa: add support for the Marvell 88E6095/6095F switch chips Add support for the Marvell 88E6095/6095F switch chips. These chips are similar to the 88e6131, so we can add the support to mv88e6131.c easily. Thanks to Gary Thomas and Jesper Dangaard Brouer for testing various patches. Signed-off-by: Lennert Buytenhek Tested-by: Gary Thomas Signed-off-by: David S. Miller --- net/dsa/Kconfig | 6 +++--- net/dsa/mv88e6131.c | 22 ++++++++++++++-------- 2 files changed, 17 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 49211b35725b..c51b55400dc5 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -41,13 +41,13 @@ config NET_DSA_MV88E6XXX_NEED_PPU default n config NET_DSA_MV88E6131 - bool "Marvell 88E6131 ethernet switch chip support" + bool "Marvell 88E6095/6095F/6131 ethernet switch chip support" select NET_DSA_MV88E6XXX select NET_DSA_MV88E6XXX_NEED_PPU select NET_DSA_TAG_DSA ---help--- - This enables support for the Marvell 88E6131 ethernet switch - chip. + This enables support for the Marvell 88E6095/6095F/6131 + ethernet switch chips. config NET_DSA_MV88E6123_61_65 bool "Marvell 88E6123/6161/6165 ethernet switch chip support" diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index 70fae2444cb6..002995721ecf 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -1,6 +1,6 @@ /* - * net/dsa/mv88e6131.c - Marvell 88e6131 switch chip support - * Copyright (c) 2008 Marvell Semiconductor + * net/dsa/mv88e6131.c - Marvell 88e6095/6095f/6131 switch chip support + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,6 +21,8 @@ static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; + if (ret == 0x0950) + return "Marvell 88E6095/88E6095F"; if (ret == 0x1060) return "Marvell 88E6131"; } @@ -36,7 +38,7 @@ static int mv88e6131_switch_reset(struct dsa_switch *ds) /* * Set all ports to the disabled state. */ - for (i = 0; i < 8; i++) { + for (i = 0; i < 11; i++) { ret = REG_READ(REG_PORT(i), 0x04); REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); } @@ -136,7 +138,7 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) * Clear all trunk masks. */ for (i = 0; i < 8; i++) - REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); + REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0x7ff); /* * Clear all trunk mappings. @@ -159,9 +161,13 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) /* * MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values. + * or flow control state to any particular values on physical + * ports, but force the CPU port to 1000 Mb/s full duplex. */ - REG_WRITE(addr, 0x01, 0x0003); + if (p == ds->cpu_port) + REG_WRITE(addr, 0x01, 0x003e); + else + REG_WRITE(addr, 0x01, 0x0003); /* * Port Control: disable Core Tag, disable Drop-on-Lock, @@ -268,7 +274,7 @@ static int mv88e6131_setup(struct dsa_switch *ds) if (ret < 0) return ret; - for (i = 0; i < 6; i++) { + for (i = 0; i < 11; i++) { ret = mv88e6131_setup_port(ds, i); if (ret < 0) return ret; @@ -279,7 +285,7 @@ static int mv88e6131_setup(struct dsa_switch *ds) static int mv88e6131_port_to_phy_addr(int port) { - if (port >= 0 && port != 3 && port <= 7) + if (port >= 0 && port <= 11) return port; return -1; } -- cgit v1.2.3 From e84665c9cb4db963393fafad6fefe5efdd7e4a09 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Fri, 20 Mar 2009 09:52:09 +0000 Subject: dsa: add switch chip cascading support The initial version of the DSA driver only supported a single switch chip per network interface, while DSA-capable switch chips can be interconnected to form a tree of switch chips. This patch adds support for multiple switch chips on a network interface. An example topology for a 16-port device with an embedded CPU is as follows: +-----+ +--------+ +--------+ | |eth0 10| switch |9 10| switch | | CPU +----------+ +-------+ | | | | chip 0 | | chip 1 | +-----+ +---++---+ +---++---+ || || || || ||1000baseT ||1000baseT ||ports 1-8 ||ports 9-16 This requires a couple of interdependent changes in the DSA layer: - The dsa platform driver data needs to be extended: there is still only one netdevice per DSA driver instance (eth0 in the example above), but each of the switch chips in the tree needs its own mii_bus device pointer, MII management bus address, and port name array. (include/net/dsa.h) The existing in-tree dsa users need some small changes to deal with this. (arch/arm) - The DSA and Ethertype DSA tagging modules need to be extended to use the DSA device ID field on receive and demultiplex the packet accordingly, and fill in the DSA device ID field on transmit according to which switch chip the packet is heading to. (net/dsa/tag_{dsa,edsa}.c) - The concept of "CPU port", which is the switch chip port that the CPU is connected to (port 10 on switch chip 0 in the example), needs to be extended with the concept of "upstream port", which is the port on the switch chip that will bring us one hop closer to the CPU (port 10 for both switch chips in the example above). - The dsa platform data needs to specify which ports on which switch chips are links to other switch chips, so that we can enable DSA tagging mode on them. (For inter-switch links, we always use non-EtherType DSA tagging, since it has lower overhead. The CPU link uses dsa or edsa tagging depending on what the 'root' switch chip supports.) This is done by specifying "dsa" for the given port in the port array. - The dsa platform data needs to be extended with information on via which port to reach any given switch chip from any given switch chip. This info is specified via the per-switch chip data struct ->rtable[] array, which gives the nexthop ports for each of the other switches in the tree. For the example topology above, the dsa platform data would look something like this: static struct dsa_chip_data sw[2] = { { .mii_bus = &foo, .sw_addr = 1, .port_names[0] = "p1", .port_names[1] = "p2", .port_names[2] = "p3", .port_names[3] = "p4", .port_names[4] = "p5", .port_names[5] = "p6", .port_names[6] = "p7", .port_names[7] = "p8", .port_names[9] = "dsa", .port_names[10] = "cpu", .rtable = (s8 []){ -1, 9, }, }, { .mii_bus = &foo, .sw_addr = 2, .port_names[0] = "p9", .port_names[1] = "p10", .port_names[2] = "p11", .port_names[3] = "p12", .port_names[4] = "p13", .port_names[5] = "p14", .port_names[6] = "p15", .port_names[7] = "p16", .port_names[10] = "dsa", .rtable = (s8 []){ 10, -1, }, }, }, static struct dsa_platform_data pd = { .netdev = &foo, .nr_switches = 2, .sw = sw, }; Signed-off-by: Lennert Buytenhek Tested-by: Gary Thomas Signed-off-by: David S. Miller --- net/dsa/dsa.c | 177 ++++++++++++++++++++++++++++------------------ net/dsa/dsa_priv.h | 97 ++++++++++++++++++++----- net/dsa/mv88e6060.c | 12 ++-- net/dsa/mv88e6123_61_65.c | 92 +++++++++++++++--------- net/dsa/mv88e6131.c | 78 ++++++++++++-------- net/dsa/slave.c | 25 ++++--- net/dsa/tag_dsa.c | 30 +++++--- net/dsa/tag_edsa.c | 30 +++++--- net/dsa/tag_trailer.c | 10 +-- 9 files changed, 363 insertions(+), 188 deletions(-) (limited to 'net') diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 33e99462023a..71489f69a42c 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -1,6 +1,6 @@ /* * net/dsa/dsa.c - Hardware switch handling - * Copyright (c) 2008 Marvell Semiconductor + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -67,12 +67,13 @@ dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) /* basic switch operations **************************************************/ static struct dsa_switch * -dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, - struct mii_bus *bus, struct net_device *dev) +dsa_switch_setup(struct dsa_switch_tree *dst, int index, + struct device *parent, struct mii_bus *bus) { + struct dsa_chip_data *pd = dst->pd->chip + index; + struct dsa_switch_driver *drv; struct dsa_switch *ds; int ret; - struct dsa_switch_driver *drv; char *name; int i; @@ -81,11 +82,12 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, */ drv = dsa_switch_probe(bus, pd->sw_addr, &name); if (drv == NULL) { - printk(KERN_ERR "%s: could not detect attached switch\n", - dev->name); + printk(KERN_ERR "%s[%d]: could not detect attached switch\n", + dst->master_netdev->name, index); return ERR_PTR(-EINVAL); } - printk(KERN_INFO "%s: detected a %s switch\n", dev->name, name); + printk(KERN_INFO "%s[%d]: detected a %s switch\n", + dst->master_netdev->name, index, name); /* @@ -95,18 +97,16 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, if (ds == NULL) return ERR_PTR(-ENOMEM); - ds->pd = pd; - ds->master_netdev = dev; - ds->master_mii_bus = bus; - + ds->dst = dst; + ds->index = index; + ds->pd = dst->pd->chip + index; ds->drv = drv; - ds->tag_protocol = drv->tag_protocol; + ds->master_mii_bus = bus; /* * Validate supplied switch configuration. */ - ds->cpu_port = -1; for (i = 0; i < DSA_MAX_PORTS; i++) { char *name; @@ -115,32 +115,28 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, continue; if (!strcmp(name, "cpu")) { - if (ds->cpu_port != -1) { + if (dst->cpu_switch != -1) { printk(KERN_ERR "multiple cpu ports?!\n"); ret = -EINVAL; goto out; } - ds->cpu_port = i; + dst->cpu_switch = index; + dst->cpu_port = i; + } else if (!strcmp(name, "dsa")) { + ds->dsa_port_mask |= 1 << i; } else { - ds->valid_port_mask |= 1 << i; + ds->phys_port_mask |= 1 << i; } } - if (ds->cpu_port == -1) { - printk(KERN_ERR "no cpu port?!\n"); - ret = -EINVAL; - goto out; - } - /* - * If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. (Which will - * discard received packets until we set ds->ports[] below.) + * If the CPU connects to this switch, set the switch tree + * tagging protocol to the preferred tagging format of this + * switch. */ - wmb(); - dev->dsa_ptr = (void *)ds; + if (ds->dst->cpu_switch == index) + ds->dst->tag_protocol = drv->tag_protocol; /* @@ -150,7 +146,7 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, if (ret < 0) goto out; - ret = drv->set_addr(ds, dev->dev_addr); + ret = drv->set_addr(ds, dst->master_netdev->dev_addr); if (ret < 0) goto out; @@ -169,18 +165,18 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, /* * Create network devices for physical switch ports. */ - wmb(); for (i = 0; i < DSA_MAX_PORTS; i++) { struct net_device *slave_dev; - if (!(ds->valid_port_mask & (1 << i))) + if (!(ds->phys_port_mask & (1 << i))) continue; slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); if (slave_dev == NULL) { - printk(KERN_ERR "%s: can't create dsa slave " - "device for port %d(%s)\n", - dev->name, i, pd->port_names[i]); + printk(KERN_ERR "%s[%d]: can't create dsa " + "slave device for port %d(%s)\n", + dst->master_netdev->name, + index, i, pd->port_names[i]); continue; } @@ -192,7 +188,6 @@ dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, out_free: mdiobus_free(ds->slave_mii_bus); out: - dev->dsa_ptr = NULL; kfree(ds); return ERR_PTR(ret); } @@ -212,35 +207,42 @@ static void dsa_switch_destroy(struct dsa_switch *ds) */ bool dsa_uses_dsa_tags(void *dsa_ptr) { - struct dsa_switch *ds = dsa_ptr; + struct dsa_switch_tree *dst = dsa_ptr; - return !!(ds->tag_protocol == htons(ETH_P_DSA)); + return !!(dst->tag_protocol == htons(ETH_P_DSA)); } bool dsa_uses_trailer_tags(void *dsa_ptr) { - struct dsa_switch *ds = dsa_ptr; + struct dsa_switch_tree *dst = dsa_ptr; - return !!(ds->tag_protocol == htons(ETH_P_TRAILER)); + return !!(dst->tag_protocol == htons(ETH_P_TRAILER)); } /* link polling *************************************************************/ static void dsa_link_poll_work(struct work_struct *ugly) { - struct dsa_switch *ds; + struct dsa_switch_tree *dst; + int i; + + dst = container_of(ugly, struct dsa_switch_tree, link_poll_work); - ds = container_of(ugly, struct dsa_switch, link_poll_work); + for (i = 0; i < dst->pd->nr_chips; i++) { + struct dsa_switch *ds = dst->ds[i]; - ds->drv->poll_link(ds); - mod_timer(&ds->link_poll_timer, round_jiffies(jiffies + HZ)); + if (ds != NULL && ds->drv->poll_link != NULL) + ds->drv->poll_link(ds); + } + + mod_timer(&dst->link_poll_timer, round_jiffies(jiffies + HZ)); } -static void dsa_link_poll_timer(unsigned long _ds) +static void dsa_link_poll_timer(unsigned long _dst) { - struct dsa_switch *ds = (void *)_ds; + struct dsa_switch_tree *dst = (void *)_dst; - schedule_work(&ds->link_poll_work); + schedule_work(&dst->link_poll_work); } @@ -303,18 +305,14 @@ static int dsa_probe(struct platform_device *pdev) static int dsa_version_printed; struct dsa_platform_data *pd = pdev->dev.platform_data; struct net_device *dev; - struct mii_bus *bus; - struct dsa_switch *ds; + struct dsa_switch_tree *dst; + int i; if (!dsa_version_printed++) printk(KERN_NOTICE "Distributed Switch Architecture " "driver version %s\n", dsa_driver_version); - if (pd == NULL || pd->mii_bus == NULL || pd->netdev == NULL) - return -EINVAL; - - bus = dev_to_mii_bus(pd->mii_bus); - if (bus == NULL) + if (pd == NULL || pd->netdev == NULL) return -EINVAL; dev = dev_to_net_device(pd->netdev); @@ -326,36 +324,79 @@ static int dsa_probe(struct platform_device *pdev) return -EEXIST; } - ds = dsa_switch_setup(&pdev->dev, pd, bus, dev); - if (IS_ERR(ds)) { + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (dst == NULL) { dev_put(dev); - return PTR_ERR(ds); + return -ENOMEM; } - if (ds->drv->poll_link != NULL) { - INIT_WORK(&ds->link_poll_work, dsa_link_poll_work); - init_timer(&ds->link_poll_timer); - ds->link_poll_timer.data = (unsigned long)ds; - ds->link_poll_timer.function = dsa_link_poll_timer; - ds->link_poll_timer.expires = round_jiffies(jiffies + HZ); - add_timer(&ds->link_poll_timer); + platform_set_drvdata(pdev, dst); + + dst->pd = pd; + dst->master_netdev = dev; + dst->cpu_switch = -1; + dst->cpu_port = -1; + + for (i = 0; i < pd->nr_chips; i++) { + struct mii_bus *bus; + struct dsa_switch *ds; + + bus = dev_to_mii_bus(pd->chip[i].mii_bus); + if (bus == NULL) { + printk(KERN_ERR "%s[%d]: no mii bus found for " + "dsa switch\n", dev->name, i); + continue; + } + + ds = dsa_switch_setup(dst, i, &pdev->dev, bus); + if (IS_ERR(ds)) { + printk(KERN_ERR "%s[%d]: couldn't create dsa switch " + "instance (error %ld)\n", dev->name, i, + PTR_ERR(ds)); + continue; + } + + dst->ds[i] = ds; + if (ds->drv->poll_link != NULL) + dst->link_poll_needed = 1; } - platform_set_drvdata(pdev, ds); + /* + * If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. + */ + wmb(); + dev->dsa_ptr = (void *)dst; + + if (dst->link_poll_needed) { + INIT_WORK(&dst->link_poll_work, dsa_link_poll_work); + init_timer(&dst->link_poll_timer); + dst->link_poll_timer.data = (unsigned long)dst; + dst->link_poll_timer.function = dsa_link_poll_timer; + dst->link_poll_timer.expires = round_jiffies(jiffies + HZ); + add_timer(&dst->link_poll_timer); + } return 0; } static int dsa_remove(struct platform_device *pdev) { - struct dsa_switch *ds = platform_get_drvdata(pdev); + struct dsa_switch_tree *dst = platform_get_drvdata(pdev); + int i; - if (ds->drv->poll_link != NULL) - del_timer_sync(&ds->link_poll_timer); + if (dst->link_poll_needed) + del_timer_sync(&dst->link_poll_timer); flush_scheduled_work(); - dsa_switch_destroy(ds); + for (i = 0; i < dst->pd->nr_chips; i++) { + struct dsa_switch *ds = dst->ds[i]; + + if (ds != NULL) + dsa_switch_destroy(ds); + } return 0; } diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 7063378a1ebf..41055f33d28a 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -1,6 +1,6 @@ /* * net/dsa/dsa_priv.h - Hardware switch handling - * Copyright (c) 2008 Marvell Semiconductor + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,42 +19,107 @@ struct dsa_switch { /* - * Configuration data for the platform device that owns - * this dsa switch instance. + * Parent switch tree, and switch index. */ - struct dsa_platform_data *pd; + struct dsa_switch_tree *dst; + int index; /* - * References to network device and mii bus to use. + * Configuration data for this switch. */ - struct net_device *master_netdev; - struct mii_bus *master_mii_bus; + struct dsa_chip_data *pd; /* - * The used switch driver and frame tagging type. + * The used switch driver. */ struct dsa_switch_driver *drv; - __be16 tag_protocol; + + /* + * Reference to mii bus to use. + */ + struct mii_bus *master_mii_bus; /* * Slave mii_bus and devices for the individual ports. */ - int cpu_port; - u32 valid_port_mask; - struct mii_bus *slave_mii_bus; - struct net_device *ports[DSA_MAX_PORTS]; + u32 dsa_port_mask; + u32 phys_port_mask; + struct mii_bus *slave_mii_bus; + struct net_device *ports[DSA_MAX_PORTS]; +}; + +struct dsa_switch_tree { + /* + * Configuration data for the platform device that owns + * this dsa switch tree instance. + */ + struct dsa_platform_data *pd; + + /* + * Reference to network device to use, and which tagging + * protocol to use. + */ + struct net_device *master_netdev; + __be16 tag_protocol; + + /* + * The switch and port to which the CPU is attached. + */ + s8 cpu_switch; + s8 cpu_port; /* * Link state polling. */ - struct work_struct link_poll_work; - struct timer_list link_poll_timer; + int link_poll_needed; + struct work_struct link_poll_work; + struct timer_list link_poll_timer; + + /* + * Data for the individual switch chips. + */ + struct dsa_switch *ds[DSA_MAX_SWITCHES]; }; +static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p) +{ + return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port); +} + +static inline u8 dsa_upstream_port(struct dsa_switch *ds) +{ + struct dsa_switch_tree *dst = ds->dst; + + /* + * If this is the root switch (i.e. the switch that connects + * to the CPU), return the cpu port number on this switch. + * Else return the (DSA) port number that connects to the + * switch that is one hop closer to the cpu. + */ + if (dst->cpu_switch == ds->index) + return dst->cpu_port; + else + return ds->pd->rtable[dst->cpu_switch]; +} + struct dsa_slave_priv { + /* + * The linux network interface corresponding to this + * switch port. + */ struct net_device *dev; + + /* + * Which switch this port is a part of, and the port index + * for this port. + */ struct dsa_switch *parent; - int port; + u8 port; + + /* + * The phylib phy_device pointer for the PHY connected + * to this port. + */ struct phy_device *phy; }; diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c index 85081ae9fe89..83277f463af7 100644 --- a/net/dsa/mv88e6060.c +++ b/net/dsa/mv88e6060.c @@ -1,6 +1,6 @@ /* * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips - * Copyright (c) 2008 Marvell Semiconductor + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -81,7 +81,7 @@ static int mv88e6060_switch_reset(struct dsa_switch *ds) /* * Reset the switch. */ - REG_WRITE(REG_GLOBAL, 0x0A, 0xa130); + REG_WRITE(REG_GLOBAL, 0x0a, 0xa130); /* * Wait up to one second for reset to complete. @@ -128,7 +128,7 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) * state to Forwarding. Additionally, if this is the CPU * port, enable Ingress and Egress Trailer tagging mode. */ - REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x4103 : 0x0003); + REG_WRITE(addr, 0x04, dsa_is_cpu_port(ds, p) ? 0x4103 : 0x0003); /* * Port based VLAN map: give each port its own address @@ -138,9 +138,9 @@ static int mv88e6060_setup_port(struct dsa_switch *ds, int p) */ REG_WRITE(addr, 0x06, ((p & 0xf) << 12) | - ((p == ds->cpu_port) ? - ds->valid_port_mask : - (1 << ds->cpu_port))); + (dsa_is_cpu_port(ds, p) ? + ds->phys_port_mask : + (1 << ds->dst->cpu_port))); /* * Port Association Vector: when learning source addresses diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c index 100318722214..52faaa21a4d9 100644 --- a/net/dsa/mv88e6123_61_65.c +++ b/net/dsa/mv88e6123_61_65.c @@ -1,6 +1,6 @@ /* * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support - * Copyright (c) 2008 Marvell Semiconductor + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -98,17 +98,17 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) return ret; /* - * Configure the cpu port, and configure the cpu port as the - * port to which ingress and egress monitor frames are to be - * sent. + * Configure the upstream port, and configure the upstream + * port as the port to which ingress and egress monitor frames + * are to be sent. */ - REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1110)); + REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1110)); /* * Disable remote management for now, and set the switch's - * DSA device number to zero. + * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, 0x0000); + REG_WRITE(REG_GLOBAL, 0x1c, ds->index & 0x1f); /* * Send all frames with destination addresses matching @@ -133,10 +133,17 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); /* - * Map all DSA device IDs to the CPU port. + * Program the DSA routing table. */ - for (i = 0; i < 32; i++) - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + for (i = 0; i < 32; i++) { + int nexthop; + + nexthop = 0x1f; + if (i != ds->index && i < ds->dst->pd->nr_chips) + nexthop = ds->pd->rtable[i] & 0x1f; + + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); + } /* * Clear all trunk masks. @@ -176,12 +183,18 @@ static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) { int addr = REG_PORT(p); + u16 val; /* * MAC Forcing register: don't force link, speed, duplex - * or flow control state to any particular values. + * or flow control state to any particular values on physical + * ports, but force the CPU port and all DSA ports to 1000 Mb/s + * full duplex. */ - REG_WRITE(addr, 0x01, 0x0003); + if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) + REG_WRITE(addr, 0x01, 0x003e); + else + REG_WRITE(addr, 0x01, 0x0003); /* * Do not limit the period of time that this port can be @@ -192,37 +205,50 @@ static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) /* * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, - * configure the requested (DSA/EDSA) tagging mode if this is - * the CPU port, disable Header mode, enable IGMP/MLD snooping, - * disable VLAN tunneling, determine priority by looking at - * 802.1p and IP priority fields (IP prio has precedence), and - * set STP state to Forwarding. Finally, if this is the CPU - * port, additionally enable forwarding of unknown unicast and - * multicast addresses. - */ - REG_WRITE(addr, 0x04, - (p == ds->cpu_port) ? - (ds->tag_protocol == htons(ETH_P_DSA)) ? - 0x053f : 0x373f : - 0x0433); + * disable Header mode, enable IGMP/MLD snooping, disable VLAN + * tunneling, determine priority by looking at 802.1p and IP + * priority fields (IP prio has precedence), and set STP state + * to Forwarding. + * + * If this is the CPU link, use DSA or EDSA tagging depending + * on which tagging mode was configured. + * + * If this is a link to another switch, use DSA tagging mode. + * + * If this is the upstream port for this switch, enable + * forwarding of unknown unicasts and multicasts. + */ + val = 0x0433; + if (dsa_is_cpu_port(ds, p)) { + if (ds->dst->tag_protocol == htons(ETH_P_EDSA)) + val |= 0x3300; + else + val |= 0x0100; + } + if (ds->dsa_port_mask & (1 << p)) + val |= 0x0100; + if (p == dsa_upstream_port(ds)) + val |= 0x000c; + REG_WRITE(addr, 0x04, val); /* * Port Control 1: disable trunking. Also, if this is the * CPU port, enable learn messages to be sent to this port. */ - REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); /* * Port based VLAN map: give each port its own address * database, allow the CPU port to talk to each of the 'real' * ports, and allow each of the 'real' ports to only talk to - * the CPU port. - */ - REG_WRITE(addr, 0x06, - ((p & 0xf) << 12) | - ((p == ds->cpu_port) ? - ds->valid_port_mask : - (1 << ds->cpu_port))); + * the upstream port. + */ + val = (p & 0xf) << 12; + if (dsa_is_cpu_port(ds, p)) + val |= ds->phys_port_mask; + else + val |= 1 << dsa_upstream_port(ds); + REG_WRITE(addr, 0x06, val); /* * Default VLAN ID and priority: don't set a default VLAN diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c index 002995721ecf..bb2b41bc854e 100644 --- a/net/dsa/mv88e6131.c +++ b/net/dsa/mv88e6131.c @@ -102,17 +102,17 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) REG_WRITE(REG_GLOBAL, 0x19, 0x8100); /* - * Disable ARP mirroring, and configure the cpu port as the - * port to which ingress and egress monitor frames are to be - * sent. + * Disable ARP mirroring, and configure the upstream port as + * the port to which ingress and egress monitor frames are to + * be sent. */ - REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1100) | 0x00f0); + REG_WRITE(REG_GLOBAL, 0x1a, (dsa_upstream_port(ds) * 0x1100) | 0x00f0); /* * Disable cascade port functionality, and set the switch's - * DSA device number to zero. + * DSA device number. */ - REG_WRITE(REG_GLOBAL, 0x1c, 0xe000); + REG_WRITE(REG_GLOBAL, 0x1c, 0xe000 | (ds->index & 0x1f)); /* * Send all frames with destination addresses matching @@ -129,10 +129,17 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); /* - * Map all DSA device IDs to the CPU port. + * Program the DSA routing table. */ - for (i = 0; i < 32; i++) - REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + for (i = 0; i < 32; i++) { + int nexthop; + + nexthop = 0x1f; + if (i != ds->index && i < ds->dst->pd->nr_chips) + nexthop = ds->pd->rtable[i] & 0x1f; + + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | nexthop); + } /* * Clear all trunk masks. @@ -158,13 +165,15 @@ static int mv88e6131_setup_global(struct dsa_switch *ds) static int mv88e6131_setup_port(struct dsa_switch *ds, int p) { int addr = REG_PORT(p); + u16 val; /* * MAC Forcing register: don't force link, speed, duplex * or flow control state to any particular values on physical - * ports, but force the CPU port to 1000 Mb/s full duplex. + * ports, but force the CPU port and all DSA ports to 1000 Mb/s + * full duplex. */ - if (p == ds->cpu_port) + if (dsa_is_cpu_port(ds, p) || ds->dsa_port_mask & (1 << p)) REG_WRITE(addr, 0x01, 0x003e); else REG_WRITE(addr, 0x01, 0x0003); @@ -175,29 +184,40 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN * tunneling, determine priority by looking at 802.1p and * IP priority fields (IP prio has precedence), and set STP - * state to Forwarding. Finally, if this is the CPU port, - * additionally enable DSA tagging and forwarding of unknown - * unicast addresses. + * state to Forwarding. + * + * If this is the upstream port for this switch, enable + * forwarding of unknown unicasts, and enable DSA tagging + * mode. + * + * If this is the link to another switch, use DSA tagging + * mode, but do not enable forwarding of unknown unicasts. */ - REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x0537 : 0x0433); + val = 0x0433; + if (p == dsa_upstream_port(ds)) + val |= 0x0104; + if (ds->dsa_port_mask & (1 << p)) + val |= 0x0100; + REG_WRITE(addr, 0x04, val); /* * Port Control 1: disable trunking. Also, if this is the * CPU port, enable learn messages to be sent to this port. */ - REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + REG_WRITE(addr, 0x05, dsa_is_cpu_port(ds, p) ? 0x8000 : 0x0000); /* * Port based VLAN map: give each port its own address * database, allow the CPU port to talk to each of the 'real' * ports, and allow each of the 'real' ports to only talk to - * the CPU port. + * the upstream port. */ - REG_WRITE(addr, 0x06, - ((p & 0xf) << 12) | - ((p == ds->cpu_port) ? - ds->valid_port_mask : - (1 << ds->cpu_port))); + val = (p & 0xf) << 12; + if (dsa_is_cpu_port(ds, p)) + val |= ds->phys_port_mask; + else + val |= 1 << dsa_upstream_port(ds); + REG_WRITE(addr, 0x06, val); /* * Default VLAN ID and priority: don't set a default VLAN @@ -213,13 +233,15 @@ static int mv88e6131_setup_port(struct dsa_switch *ds, int p) * untagged frames on this port, do a destination address * lookup on received packets as usual, don't send a copy * of all transmitted/received frames on this port to the - * CPU, and configure the CPU port number. Also, if this - * is the CPU port, enable forwarding of unknown multicast - * addresses. + * CPU, and configure the upstream port number. + * + * If this is the upstream port for this switch, enable + * forwarding of unknown multicast addresses. */ - REG_WRITE(addr, 0x08, - ((p == ds->cpu_port) ? 0x00c0 : 0x0080) | - ds->cpu_port); + val = 0x0080 | dsa_upstream_port(ds); + if (p == dsa_upstream_port(ds)) + val |= 0x0040; + REG_WRITE(addr, 0x08, val); /* * Rate Control: disable ingress rate limiting. diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 99114e5b32e4..ed131181215d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1,6 +1,6 @@ /* * net/dsa/slave.c - Slave device handling - * Copyright (c) 2008 Marvell Semiconductor + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,7 +19,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { struct dsa_switch *ds = bus->priv; - if (ds->valid_port_mask & (1 << addr)) + if (ds->phys_port_mask & (1 << addr)) return ds->drv->phy_read(ds, addr, reg); return 0xffff; @@ -29,7 +29,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) { struct dsa_switch *ds = bus->priv; - if (ds->valid_port_mask & (1 << addr)) + if (ds->phys_port_mask & (1 << addr)) return ds->drv->phy_write(ds, addr, reg, val); return 0; @@ -43,7 +43,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) ds->slave_mii_bus->write = dsa_slave_phy_write; snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", ds->master_mii_bus->id, ds->pd->sw_addr); - ds->slave_mii_bus->parent = &(ds->master_mii_bus->dev); + ds->slave_mii_bus->parent = &ds->master_mii_bus->dev; } @@ -51,9 +51,8 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) static int dsa_slave_init(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->parent->master_netdev; - dev->iflink = master->ifindex; + dev->iflink = p->parent->dst->master_netdev->ifindex; return 0; } @@ -61,7 +60,7 @@ static int dsa_slave_init(struct net_device *dev) static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->parent->master_netdev; + struct net_device *master = p->parent->dst->master_netdev; int err; if (!(master->flags & IFF_UP)) @@ -99,7 +98,7 @@ out: static int dsa_slave_close(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->parent->master_netdev; + struct net_device *master = p->parent->dst->master_netdev; dev_mc_unsync(master, dev); dev_unicast_unsync(master, dev); @@ -117,7 +116,7 @@ static int dsa_slave_close(struct net_device *dev) static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->parent->master_netdev; + struct net_device *master = p->parent->dst->master_netdev; if (change & IFF_ALLMULTI) dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); @@ -128,7 +127,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) static void dsa_slave_set_rx_mode(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->parent->master_netdev; + struct net_device *master = p->parent->dst->master_netdev; dev_mc_sync(master, dev); dev_unicast_sync(master, dev); @@ -137,7 +136,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev) static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = p->parent->master_netdev; + struct net_device *master = p->parent->dst->master_netdev; struct sockaddr *addr = a; int err; @@ -341,7 +340,7 @@ struct net_device * dsa_slave_create(struct dsa_switch *ds, struct device *parent, int port, char *name) { - struct net_device *master = ds->master_netdev; + struct net_device *master = ds->dst->master_netdev; struct net_device *slave_dev; struct dsa_slave_priv *p; int ret; @@ -356,7 +355,7 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent, memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); slave_dev->tx_queue_len = 0; - switch (ds->tag_protocol) { + switch (ds->dst->tag_protocol) { #ifdef CONFIG_NET_DSA_TAG_DSA case htons(ETH_P_DSA): slave_dev->netdev_ops = &dsa_netdev_ops; diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 0b8a91ddff44..8fa25bafe6ca 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -1,6 +1,6 @@ /* * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging - * Copyright (c) 2008 Marvell Semiconductor + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,7 +36,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev) * Construct tagged FROM_CPU DSA tag from 802.1q tag. */ dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x60; + dsa_header[0] = 0x60 | p->parent->index; dsa_header[1] = p->port << 3; /* @@ -57,7 +57,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev) * Construct untagged FROM_CPU DSA tag. */ dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x40; + dsa_header[0] = 0x40 | p->parent->index; dsa_header[1] = p->port << 3; dsa_header[2] = 0x00; dsa_header[3] = 0x00; @@ -65,7 +65,7 @@ int dsa_xmit(struct sk_buff *skb, struct net_device *dev) skb->protocol = htons(ETH_P_DSA); - skb->dev = p->parent->master_netdev; + skb->dev = p->parent->dst->master_netdev; dev_queue_xmit(skb); return NETDEV_TX_OK; @@ -78,11 +78,13 @@ out_free: static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct dsa_switch *ds = dev->dsa_ptr; + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; u8 *dsa_header; + int source_device; int source_port; - if (unlikely(ds == NULL)) + if (unlikely(dst == NULL)) goto out_drop; skb = skb_unshare(skb, GFP_ATOMIC); @@ -98,16 +100,24 @@ static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, dsa_header = skb->data - 2; /* - * Check that frame type is either TO_CPU or FORWARD, and - * that the source device is zero. + * Check that frame type is either TO_CPU or FORWARD. */ - if ((dsa_header[0] & 0xdf) != 0x00 && (dsa_header[0] & 0xdf) != 0xc0) + if ((dsa_header[0] & 0xc0) != 0x00 && (dsa_header[0] & 0xc0) != 0xc0) goto out_drop; /* - * Check that the source port is a registered DSA port. + * Determine source device and port. */ + source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; + + /* + * Check that the source device exists and that the source + * port is a registered DSA port. + */ + if (source_device >= dst->pd->nr_chips) + goto out_drop; + ds = dst->ds[source_device]; if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) goto out_drop; diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 16fcb6d196d4..815607bd286f 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -1,6 +1,6 @@ /* * net/dsa/tag_edsa.c - Ethertype DSA tagging - * Copyright (c) 2008 Marvell Semiconductor + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -45,7 +45,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev) edsa_header[1] = ETH_P_EDSA & 0xff; edsa_header[2] = 0x00; edsa_header[3] = 0x00; - edsa_header[4] = 0x60; + edsa_header[4] = 0x60 | p->parent->index; edsa_header[5] = p->port << 3; /* @@ -70,7 +70,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev) edsa_header[1] = ETH_P_EDSA & 0xff; edsa_header[2] = 0x00; edsa_header[3] = 0x00; - edsa_header[4] = 0x40; + edsa_header[4] = 0x40 | p->parent->index; edsa_header[5] = p->port << 3; edsa_header[6] = 0x00; edsa_header[7] = 0x00; @@ -78,7 +78,7 @@ int edsa_xmit(struct sk_buff *skb, struct net_device *dev) skb->protocol = htons(ETH_P_EDSA); - skb->dev = p->parent->master_netdev; + skb->dev = p->parent->dst->master_netdev; dev_queue_xmit(skb); return NETDEV_TX_OK; @@ -91,11 +91,13 @@ out_free: static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct dsa_switch *ds = dev->dsa_ptr; + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; u8 *edsa_header; + int source_device; int source_port; - if (unlikely(ds == NULL)) + if (unlikely(dst == NULL)) goto out_drop; skb = skb_unshare(skb, GFP_ATOMIC); @@ -111,16 +113,24 @@ static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, edsa_header = skb->data + 2; /* - * Check that frame type is either TO_CPU or FORWARD, and - * that the source device is zero. + * Check that frame type is either TO_CPU or FORWARD. */ - if ((edsa_header[0] & 0xdf) != 0x00 && (edsa_header[0] & 0xdf) != 0xc0) + if ((edsa_header[0] & 0xc0) != 0x00 && (edsa_header[0] & 0xc0) != 0xc0) goto out_drop; /* - * Check that the source port is a registered DSA port. + * Determine source device and port. */ + source_device = edsa_header[0] & 0x1f; source_port = (edsa_header[1] >> 3) & 0x1f; + + /* + * Check that the source device exists and that the source + * port is a registered DSA port. + */ + if (source_device >= dst->pd->nr_chips) + goto out_drop; + ds = dst->ds[source_device]; if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) goto out_drop; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index a6d959da6784..1c3e30c38b86 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -1,6 +1,6 @@ /* * net/dsa/tag_trailer.c - Trailer tag format handling - * Copyright (c) 2008 Marvell Semiconductor + * Copyright (c) 2008-2009 Marvell Semiconductor * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -59,7 +59,7 @@ int trailer_xmit(struct sk_buff *skb, struct net_device *dev) nskb->protocol = htons(ETH_P_TRAILER); - nskb->dev = p->parent->master_netdev; + nskb->dev = p->parent->dst->master_netdev; dev_queue_xmit(nskb); return NETDEV_TX_OK; @@ -68,12 +68,14 @@ int trailer_xmit(struct sk_buff *skb, struct net_device *dev) static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct dsa_switch *ds = dev->dsa_ptr; + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; u8 *trailer; int source_port; - if (unlikely(ds == NULL)) + if (unlikely(dst == NULL)) goto out_drop; + ds = dst->ds[0]; skb = skb_unshare(skb, GFP_ATOMIC); if (skb == NULL) -- cgit v1.2.3 From 788dee0a954745a182f9341539e5e0fe874b48fc Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Mar 2009 19:35:28 +0000 Subject: atm: convert mpc device to using netdev_ops This converts the mpc device to using new netdevice_ops. Compile tested only, needs more than usual review since device was swaping pointers around etc. Signed-off-by: Stephen Hemminger Acked-by: Chas Williams Signed-off-by: David S. Miller --- net/atm/mpc.c | 32 ++++++++++++++------------------ net/atm/mpc.h | 5 ++++- 2 files changed, 18 insertions(+), 19 deletions(-) (limited to 'net') diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 039d5cc72c3d..e5bf11453a18 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -286,33 +286,32 @@ static void start_mpc(struct mpoa_client *mpc, struct net_device *dev) { dprintk("mpoa: (%s) start_mpc:\n", mpc->dev->name); - if (dev->hard_start_xmit == NULL) { - printk("mpoa: (%s) start_mpc: dev->hard_start_xmit == NULL, not starting\n", - dev->name); - return; + if (!dev->netdev_ops) + printk("mpoa: (%s) start_mpc not starting\n", dev->name); + else { + mpc->old_ops = dev->netdev_ops; + mpc->new_ops = *mpc->old_ops; + mpc->new_ops.ndo_start_xmit = mpc_send_packet; + dev->netdev_ops = &mpc->new_ops; } - mpc->old_hard_start_xmit = dev->hard_start_xmit; - dev->hard_start_xmit = mpc_send_packet; - - return; } static void stop_mpc(struct mpoa_client *mpc) { - + struct net_device *dev = mpc->dev; dprintk("mpoa: (%s) stop_mpc:", mpc->dev->name); /* Lets not nullify lec device's dev->hard_start_xmit */ - if (mpc->dev->hard_start_xmit != mpc_send_packet) { + if (dev->netdev_ops != &mpc->new_ops) { dprintk(" mpc already stopped, not fatal\n"); return; } dprintk("\n"); - mpc->dev->hard_start_xmit = mpc->old_hard_start_xmit; - mpc->old_hard_start_xmit = NULL; - /* close_shortcuts(mpc); ??? FIXME */ - return; + dev->netdev_ops = mpc->old_ops; + mpc->old_ops = NULL; + + /* close_shortcuts(mpc); ??? FIXME */ } static const char *mpoa_device_type_string(char type) __attribute__ ((unused)); @@ -531,7 +530,6 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc) */ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev) { - int retval; struct mpoa_client *mpc; struct ethhdr *eth; int i = 0; @@ -561,9 +559,7 @@ static int mpc_send_packet(struct sk_buff *skb, struct net_device *dev) } non_ip: - retval = mpc->old_hard_start_xmit(skb,dev); - - return retval; + return mpc->old_ops->ndo_start_xmit(skb,dev); } static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg) diff --git a/net/atm/mpc.h b/net/atm/mpc.h index 24c386c35f57..0919a88bbc70 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -15,7 +15,7 @@ struct mpoa_client { struct mpoa_client *next; struct net_device *dev; /* lec in question */ int dev_num; /* e.g. 2 for lec2 */ - int (*old_hard_start_xmit)(struct sk_buff *skb, struct net_device *dev); + struct atm_vcc *mpoad_vcc; /* control channel to mpoad */ uint8_t mps_ctrl_addr[ATM_ESA_LEN]; /* MPS control ATM address */ uint8_t our_ctrl_addr[ATM_ESA_LEN]; /* MPC's control ATM address */ @@ -31,6 +31,9 @@ struct mpoa_client { uint8_t *mps_macs; /* array of MPS MAC addresses, >=1 */ int number_of_mps_macs; /* number of the above MAC addresses */ struct mpc_parameters parameters; /* parameters for this client */ + + const struct net_device_ops *old_ops; + struct net_device_ops new_ops; }; -- cgit v1.2.3 From dde09758557120cb71fb760cfeaed1b8e27209ef Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Mar 2009 19:35:29 +0000 Subject: atm: convert clip driver to net_device_ops Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/atm/clip.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/atm/clip.c b/net/atm/clip.c index da42fd06b61f..3dc0a3a42a57 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -552,10 +552,13 @@ static int clip_setentry(struct atm_vcc *vcc, __be32 ip) return error; } +static const struct net_device_ops clip_netdev_ops = { + .ndo_start_xmit = clip_start_xmit, +}; + static void clip_setup(struct net_device *dev) { - dev->hard_start_xmit = clip_start_xmit; - /* sg_xmit ... */ + dev->netdev_ops = &clip_netdev_ops; dev->type = ARPHRD_ATM; dev->hard_header_len = RFC1483LLC_LEN; dev->mtu = RFC1626_MTU; @@ -615,7 +618,7 @@ static int clip_device_event(struct notifier_block *this, unsigned long event, } /* ignore non-CLIP devices */ - if (dev->type != ARPHRD_ATM || dev->hard_start_xmit != clip_start_xmit) + if (dev->type != ARPHRD_ATM || dev->netdev_ops != &clip_netdev_ops) return NOTIFY_DONE; switch (event) { -- cgit v1.2.3 From 92bcd4fe9a63e8785a4f6ba4262ee601c271a70b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Mar 2009 19:35:33 +0000 Subject: irda: net_device_ops ioctl fix Need to reference net_device_ops not old pointer. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/irda/irda_device.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c index ea319e3ddc18..bf92e1473447 100644 --- a/net/irda/irda_device.c +++ b/net/irda/irda_device.c @@ -149,13 +149,14 @@ int irda_device_is_receiving(struct net_device *dev) IRDA_DEBUG(2, "%s()\n", __func__); - if (!dev->do_ioctl) { + if (!dev->netdev_ops->ndo_do_ioctl) { IRDA_ERROR("%s: do_ioctl not impl. by device driver\n", __func__); return -1; } - ret = dev->do_ioctl(dev, (struct ifreq *) &req, SIOCGRECEIVING); + ret = (dev->netdev_ops->ndo_do_ioctl)(dev, (struct ifreq *) &req, + SIOCGRECEIVING); if (ret < 0) return ret; -- cgit v1.2.3 From 9cc8ba783d56b36259b2d610e97bcda8a6fe3b02 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Mar 2009 19:35:34 +0000 Subject: irlan: convert to net_device_ops Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/irda/irlan/irlan_eth.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 05112be99569..724bcf951b80 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -45,6 +45,16 @@ static int irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); static void irlan_eth_set_multicast_list( struct net_device *dev); static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); +static const struct net_device_ops irlan_eth_netdev_ops = { + .ndo_open = irlan_eth_open, + .ndo_stop = irlan_eth_close, + .ndo_start_xmit = irlan_eth_xmit, + .ndo_get_stats = irlan_eth_get_stats, + .ndo_set_multicast_list = irlan_eth_set_multicast_list, + .ndo_change_mtu = eth_change_mtu, + .ndo_validate_addr = eth_validate_addr, +}; + /* * Function irlan_eth_setup (dev) * @@ -53,14 +63,11 @@ static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); */ static void irlan_eth_setup(struct net_device *dev) { - dev->open = irlan_eth_open; - dev->stop = irlan_eth_close; - dev->hard_start_xmit = irlan_eth_xmit; - dev->get_stats = irlan_eth_get_stats; - dev->set_multicast_list = irlan_eth_set_multicast_list; + ether_setup(dev); + + dev->netdev_ops = &irlan_eth_netdev_ops; dev->destructor = free_netdev; - ether_setup(dev); /* * Lets do all queueing in IrTTP instead of this device driver. -- cgit v1.2.3 From d44c3a2e0e5d2c75d22284462c66d166604b1f18 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 20 Mar 2009 19:36:44 +0000 Subject: netdev: expose net_device_ops compat as config option Now that most network device drivers in (all but one in x86_64 allmodconfig) support net_device_ops. Expose it as a configuration parameter. Still need to address even older 32 bit drivers, and other arch before compatiablity can be scheduled for removal in some future release. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/Kconfig | 3 --- 1 file changed, 3 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index c9fdcd7e71ea..93998a9c39c2 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -24,9 +24,6 @@ if NET menu "Networking options" -config COMPAT_NET_DEV_OPS - def_bool y - source "net/packet/Kconfig" source "net/unix/Kconfig" source "net/xfrm/Kconfig" -- cgit v1.2.3 From 96e0bf4b5193d0d97d139f99e2dd128763d55521 Mon Sep 17 00:00:00 2001 From: John Dykstra Date: Sun, 22 Mar 2009 21:49:57 -0700 Subject: tcp: Discard segments that ack data not yet sent Discard incoming packets whose ack field iincludes data not yet sent. This is consistent with RFC 793 Section 3.9. Change tcp_ack() to distinguish between too-small and too-large ack field values. Keep segments with too-large ack fields out of the fast path, and change slow path to discard them. Reported-by: Oliver Zheng Signed-off-by: John Dykstra Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8ac82b3703ab..2bc8e27a163d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3585,15 +3585,18 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) int prior_packets; int frto_cwnd = 0; - /* If the ack is newer than sent or older than previous acks + /* If the ack is older than previous acks * then we can probably ignore it. */ - if (after(ack, tp->snd_nxt)) - goto uninteresting_ack; - if (before(ack, prior_snd_una)) goto old_ack; + /* If the ack includes data we haven't sent yet, discard + * this segment (RFC793 Section 3.9). + */ + if (after(ack, tp->snd_nxt)) + goto invalid_ack; + if (after(ack, prior_snd_una)) flag |= FLAG_SND_UNA_ADVANCED; @@ -3683,6 +3686,10 @@ no_queue: tcp_ack_probe(sk); return 1; +invalid_ack: + SOCK_DEBUG(sk, "Ack %u after %u:%u\n", ack, tp->snd_una, tp->snd_nxt); + return -1; + old_ack: if (TCP_SKB_CB(skb)->sacked) { tcp_sacktag_write_queue(sk, skb, prior_snd_una); @@ -3690,8 +3697,7 @@ old_ack: tcp_try_keep_open(sk); } -uninteresting_ack: - SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); + SOCK_DEBUG(sk, "Ack %u before %u:%u\n", ack, tp->snd_una, tp->snd_nxt); return 0; } @@ -5141,7 +5147,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, */ if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags && - TCP_SKB_CB(skb)->seq == tp->rcv_nxt) { + TCP_SKB_CB(skb)->seq == tp->rcv_nxt && + !after(TCP_SKB_CB(skb)->ack_seq, tp->snd_nxt)) { int tcp_header_len = tp->tcp_header_len; /* Timestamp header prediction: tcp_header_len @@ -5294,8 +5301,8 @@ slow_path: return -res; step5: - if (th->ack) - tcp_ack(sk, skb, FLAG_SLOWPATH); + if (th->ack && tcp_ack(sk, skb, FLAG_SLOWPATH) < 0) + goto discard; tcp_rcv_rtt_measure_ts(sk, skb); @@ -5632,7 +5639,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, /* step 5: check the ACK field */ if (th->ack) { - int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH); + int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH) > 0; switch (sk->sk_state) { case TCP_SYN_RECV: -- cgit v1.2.3 From 176252746ebbc8db97e304345af1f2563c7dc139 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Mon, 23 Mar 2009 13:16:53 +0100 Subject: netfilter: sysctl support of logger choice This patchs adds support of modification of the used logger via sysctl. It can be used to change the logger to module that can not use the bind operation (ipt_LOG and ipt_ULOG). For this purpose, it creates a directory /proc/sys/net/netfilter/nf_log which contains a file per-protocol. The content of the file is the name current logger (NONE if not set) and a logger can be setup by simply echoing its name to the file. By echoing "NONE" to a /proc/sys/net/netfilter/nf_log/PROTO file, the logger corresponding to this PROTO is set to NULL. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- net/netfilter/nf_log.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 4fcbcc71aa32..8bb998fe098b 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -14,6 +14,7 @@ LOG target modules */ #define NF_LOG_PREFIXLEN 128 +#define NFLOGGER_NAME_LEN 64 static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; @@ -207,18 +208,100 @@ static const struct file_operations nflog_file_ops = { .release = seq_release, }; + #endif /* PROC_FS */ +#ifdef CONFIG_SYSCTL +struct ctl_path nf_log_sysctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, + { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, }, + { } +}; + +static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; +static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; +static struct ctl_table_header *nf_log_dir_header; -int __init netfilter_log_init(void) +static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp, loff_t *ppos) +{ + const struct nf_logger *logger; + int r = 0; + int tindex = (unsigned long)table->extra1; + + if (write) { + if (!strcmp(buffer, "NONE")) { + nf_log_unbind_pf(tindex); + return 0; + } + mutex_lock(&nf_log_mutex); + logger = __find_logger(tindex, buffer); + if (logger == NULL) { + mutex_unlock(&nf_log_mutex); + return -ENOENT; + } + rcu_assign_pointer(nf_loggers[tindex], logger); + mutex_unlock(&nf_log_mutex); + } else { + rcu_read_lock(); + logger = rcu_dereference(nf_loggers[tindex]); + if (!logger) + table->data = "NONE"; + else + table->data = logger->name; + r = proc_dostring(table, write, filp, buffer, lenp, ppos); + rcu_read_unlock(); + } + + return r; +} + +static __init int netfilter_log_sysctl_init(void) { int i; + + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { + snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); + nf_log_sysctl_table[i].ctl_name = CTL_UNNUMBERED; + nf_log_sysctl_table[i].procname = + nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; + nf_log_sysctl_table[i].data = NULL; + nf_log_sysctl_table[i].maxlen = + NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].mode = 0644; + nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; + nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; + } + + nf_log_dir_header = register_sysctl_paths(nf_log_sysctl_path, + nf_log_sysctl_table); + if (!nf_log_dir_header) + return -ENOMEM; + + return 0; +} +#else +static __init int netfilter_log_sysctl_init(void) +{ + return 0; +} +#endif /* CONFIG_SYSCTL */ + +int __init netfilter_log_init(void) +{ + int i, r; #ifdef CONFIG_PROC_FS if (!proc_create("nf_log", S_IRUGO, proc_net_netfilter, &nflog_file_ops)) return -1; #endif + /* Errors will trigger panic, unroll on error is unnecessary. */ + r = netfilter_log_sysctl_init(); + if (r < 0) + return r; + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) INIT_LIST_HEAD(&(nf_loggers_l[i])); -- cgit v1.2.3 From dd5b6ce6fd465eab90357711c8e8124dc3a31ff0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 23 Mar 2009 13:21:06 +0100 Subject: nefilter: nfnetlink: add nfnetlink_set_err and use it in ctnetlink This patch adds nfnetlink_set_err() to propagate the error to netlink broadcast listener in case of memory allocation errors in the message building. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 2 ++ net/netfilter/nfnetlink.c | 6 ++++++ net/netlink/af_netlink.c | 1 + 3 files changed, 9 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index d1fe9d15ac5c..1b75c9efb0eb 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -518,6 +518,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: + nfnetlink_set_err(0, group, -ENOBUFS); kfree_skb(skb); return NOTIFY_DONE; } @@ -1514,6 +1515,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: + nfnetlink_set_err(0, 0, -ENOBUFS); kfree_skb(skb); return NOTIFY_DONE; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9c0ba17a1ddb..2785d66a7e38 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -113,6 +113,12 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) } EXPORT_SYMBOL_GPL(nfnetlink_send); +void nfnetlink_set_err(u32 pid, u32 group, int error) +{ + netlink_set_err(nfnl, pid, group, error); +} +EXPORT_SYMBOL_GPL(nfnetlink_set_err); + int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) { return netlink_unicast(nfnl, skb, pid, flags); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 6ee69c27f806..5b33879c6422 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1106,6 +1106,7 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_unlock(&nl_table_lock); } +EXPORT_SYMBOL(netlink_set_err); /* must be called with netlink table grabbed */ static void netlink_update_socket_mc(struct netlink_sock *nlk, -- cgit v1.2.3 From 534f81a5068799799e264fd162e9488a129f98d4 Mon Sep 17 00:00:00 2001 From: "Mark H. Weaver" Date: Mon, 23 Mar 2009 13:46:12 +0100 Subject: netfilter: nf_conntrack_tcp: fix unaligned memory access in tcp_sack This patch fixes an unaligned memory access in tcp_sack while reading sequence numbers from TCP selective acknowledgement options. Prior to applying this patch, upstream linux-2.6.27.20 was occasionally generating messages like this on my sparc64 system: [54678.532071] Kernel unaligned access at TPC[6b17d4] tcp_packet+0xcd4/0xd00 Acked-by: David S. Miller Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto_tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index f3fd154d1ddd..56ac4ee77a1d 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -466,7 +467,7 @@ static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, for (i = 0; i < (opsize - TCPOLEN_SACK_BASE); i += TCPOLEN_SACK_PERBLOCK) { - tmp = ntohl(*((__be32 *)(ptr+i)+1)); + tmp = get_unaligned_be32((__be32 *)(ptr+i)+1); if (after(tmp, *sack)) *sack = tmp; -- cgit v1.2.3 From 30842f2989aacfaba3ccb39829b3417be9313dbe Mon Sep 17 00:00:00 2001 From: Vitaly Mayatskikh Date: Mon, 23 Mar 2009 15:22:33 -0700 Subject: udp: Wrong locking code in udp seq_file infrastructure Reading zero bytes from /proc/net/udp or other similar files which use the same seq_file udp infrastructure panics kernel in that way: ===================================== [ BUG: bad unlock balance detected! ] ------------------------------------- read/1985 is trying to release lock (&table->hash[i].lock) at: [] udp_seq_stop+0x27/0x29 but there are no more locks to release! other info that might help us debug this: 1 lock held by read/1985: #0: (&p->lock){--..}, at: [] seq_read+0x38/0x348 stack backtrace: Pid: 1985, comm: read Not tainted 2.6.29-rc8 #9 Call Trace: [] ? udp_seq_stop+0x27/0x29 [] print_unlock_inbalance_bug+0xd6/0xe1 [] lock_release_non_nested+0x9e/0x1c6 [] ? seq_read+0xb2/0x348 [] ? mark_held_locks+0x68/0x86 [] ? udp_seq_stop+0x27/0x29 [] lock_release+0x15d/0x189 [] _spin_unlock_bh+0x1e/0x34 [] udp_seq_stop+0x27/0x29 [] seq_read+0x2bb/0x348 [] ? seq_read+0x0/0x348 [] proc_reg_read+0x90/0xaf [] vfs_read+0xa6/0x103 [] ? trace_hardirqs_on_caller+0x12f/0x153 [] sys_read+0x45/0x69 [] system_call_fastpath+0x16/0x1b BUG: scheduling while atomic: read/1985/0xffffff00 INFO: lockdep is turned off. Modules linked in: cpufreq_ondemand acpi_cpufreq freq_table dm_multipath kvm ppdev snd_hda_codec_analog snd_hda_intel snd_hda_codec snd_hwdep snd_seq_dummy snd_seq_oss snd_seq_midi_event arc4 snd_s eq ecb thinkpad_acpi snd_seq_device iwl3945 hwmon sdhci_pci snd_pcm_oss sdhci rfkill mmc_core snd_mixer_oss i2c_i801 mac80211 yenta_socket ricoh_mmc i2c_core iTCO_wdt snd_pcm iTCO_vendor_support rs rc_nonstatic snd_timer snd lib80211 cfg80211 soundcore snd_page_alloc video parport_pc output parport e1000e [last unloaded: scsi_wait_scan] Pid: 1985, comm: read Not tainted 2.6.29-rc8 #9 Call Trace: [] ? __debug_show_held_locks+0x1b/0x24 [] __schedule_bug+0x7e/0x83 [] schedule+0xce/0x838 [] ? fsnotify_access+0x5f/0x67 [] ? sysret_careful+0xb/0x37 [] ? trace_hardirqs_on_caller+0x1f/0x153 [] ? trace_hardirqs_on_thunk+0x3a/0x3f [] sysret_careful+0x31/0x37 read[1985]: segfault at 7fffc479bfe8 ip 0000003e7420a180 sp 00007fffc479bfa0 error 6 Kernel panic - not syncing: Aiee, killing interrupt handler! udp_seq_stop() tries to unlock not yet locked spinlock. The lock was lost during splitting global udp_hash_lock to subsequent spinlocks. Signed-off by: Vitaly Mayatskikh Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c47c989cb1fb..c8bee189a193 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1614,7 +1614,8 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) } while (sk && (!net_eq(sock_net(sk), net) || sk->sk_family != state->family)); if (!sk) { - spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); + if (state->bucket < UDP_HTABLE_SIZE) + spin_unlock_bh(&state->udp_table->hash[state->bucket].lock); return udp_get_first(seq, state->bucket + 1); } return sk; @@ -1632,6 +1633,9 @@ static struct sock *udp_get_idx(struct seq_file *seq, loff_t pos) static void *udp_seq_start(struct seq_file *seq, loff_t *pos) { + struct udp_iter_state *state = seq->private; + state->bucket = UDP_HTABLE_SIZE; + return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -- cgit v1.2.3 From 1d45209d89e647e9f27e4afa1f47338df73bc112 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Mar 2009 14:26:50 +0100 Subject: netfilter: nf_conntrack: Reduce conntrack count in nf_conntrack_free() We use RCU to defer freeing of conntrack structures. In DOS situation, RCU might accumulate about 10.000 elements per CPU in its internal queues. To get accurate conntrack counts (at the expense of slightly more RAM used), we might consider conntrack counter not taking into account "about to be freed elements, waiting in RCU queues". We thus decrement it in nf_conntrack_free(), not in the RCU callback. Signed-off-by: Eric Dumazet Tested-by: Joakim Tjernlund Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index ebc275600125..55befe59e1c0 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -517,16 +517,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc); static void nf_conntrack_free_rcu(struct rcu_head *head) { struct nf_conn *ct = container_of(head, struct nf_conn, rcu); - struct net *net = nf_ct_net(ct); nf_ct_ext_free(ct); kmem_cache_free(nf_conntrack_cachep, ct); - atomic_dec(&net->ct.count); } void nf_conntrack_free(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); + nf_ct_ext_destroy(ct); + atomic_dec(&net->ct.count); call_rcu(&ct->rcu, nf_conntrack_free_rcu); } EXPORT_SYMBOL_GPL(nf_conntrack_free); -- cgit v1.2.3 From 8dd1d0471bcf634f2cd6a6cf4b6531bb61f0af47 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Tue, 24 Mar 2009 13:35:27 -0700 Subject: netfilter: trivial Kconfig spelling fixes Supplements commit 67c0d57930ff9a24c6c34abee1b01f7716a9b0e2. Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- net/ipv6/netfilter/Kconfig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 625353a5fe18..29d643bcafa4 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -101,7 +101,7 @@ config IP6_NF_MATCH_HL ---help--- This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects - COFNIG_NETFILTER_XT_MATCH_HL. + CONFIG_NETFILTER_XT_MATCH_HL. config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' @@ -137,7 +137,7 @@ config IP6_NF_TARGET_HL ---help--- This is a backwards-compat option for the user's convenience (e.g. when running oldconfig). It selects - COFNIG_NETFILTER_XT_TARGET_HL. + CONFIG_NETFILTER_XT_TARGET_HL. config IP6_NF_TARGET_LOG tristate "LOG target support" -- cgit v1.2.3 From 35c7f6de7339f40a591a8aeccacdc429b1953674 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 24 Mar 2009 14:15:22 -0700 Subject: arp_tables: ifname_compare() can assume 16bit alignment Arches without efficient unaligned access can still perform a loop assuming 16bit alignment in ifname_compare() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 64a7c6ce0b98..84b9c179df51 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -76,6 +76,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, /* * Unfortunatly, _b and _mask are not aligned to an int (or long int) * Some arches dont care, unrolling the loop is a win on them. + * For other arches, we only have a 16bit alignement. */ static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) { @@ -95,10 +96,13 @@ static unsigned long ifname_compare(const char *_a, const char *_b, const char * BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); #else unsigned long ret = 0; + const u16 *a = (const u16 *)_a; + const u16 *b = (const u16 *)_b; + const u16 *mask = (const u16 *)_mask; int i; - for (i = 0; i < IFNAMSIZ; i++) - ret |= (_a[i] ^ _b[i]) & _mask[i]; + for (i = 0; i < IFNAMSIZ/sizeof(u16); i++) + ret |= (a[i] ^ b[i]) & mask[i]; #endif return ret; } -- cgit v1.2.3 From 38938bfe3489394e2eed5e40c9bb8f66a2ce1405 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 Mar 2009 16:37:55 -0700 Subject: netlink: add NETLINK_NO_ENOBUFS socket flag This patch adds the NETLINK_NO_ENOBUFS socket flag. This flag can be used by unicast and broadcast listeners to avoid receiving ENOBUFS errors. Generally speaking, ENOBUFS errors are useful to notify two things to the listener: a) You may increase the receiver buffer size via setsockopt(). b) You have lost messages, you may be out of sync. In some cases, ignoring ENOBUFS errors can be useful. For example: a) nfnetlink_queue: this subsystem does not have any sort of resync method and you can decide to ignore ENOBUFS once you have set a given buffer size. b) ctnetlink: you can use this together with the socket flag NETLINK_BROADCAST_SEND_ERROR to stop getting ENOBUFS errors as you do not need to resync (packets whose event are not delivered are drop to provide reliable logging and state-synchronization). Moreover, the use of NETLINK_NO_ENOBUFS also reduces a "go up, go down" effect in terms of performance which is due to the netlink congestion control when the listener cannot back off. The effect is the following: 1) throughput rate goes up and netlink messages are inserted in the receiver buffer. 2) Then, netlink buffer fills and overruns (set on nlk->state bit 0). 3) While the listener empties the receiver buffer, netlink keeps dropping messages. Thus, throughput goes dramatically down. 4) Then, once the listener has emptied the buffer (nlk->state bit 0 is set off), goto step 1. This effect is easy to trigger with netlink broadcast under heavy load, and it is more noticeable when using a big receiver buffer. You can find some results in [1] that show this problem. [1] http://1984.lsi.us.es/linux/netlink/ This patch also includes the use of sk_drop to account the number of netlink messages drop due to overrun. This value is shown in /proc/net/netlink. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 38 ++++++++++++++++++++++++++++++++------ 1 file changed, 32 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b73d4e61c5ac..8b6bbb3032b0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -86,6 +86,7 @@ struct netlink_sock { #define NETLINK_KERNEL_SOCKET 0x1 #define NETLINK_RECV_PKTINFO 0x2 #define NETLINK_BROADCAST_SEND_ERROR 0x4 +#define NETLINK_RECV_NO_ENOBUFS 0x8 static inline struct netlink_sock *nlk_sk(struct sock *sk) { @@ -717,10 +718,15 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, static void netlink_overrun(struct sock *sk) { - if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { - sk->sk_err = ENOBUFS; - sk->sk_error_report(sk); + struct netlink_sock *nlk = nlk_sk(sk); + + if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) { + if (!test_and_set_bit(0, &nlk_sk(sk)->state)) { + sk->sk_err = ENOBUFS; + sk->sk_error_report(sk); + } } + atomic_inc(&sk->sk_drops); } static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) @@ -1182,6 +1188,15 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR; err = 0; break; + case NETLINK_NO_ENOBUFS: + if (val) { + nlk->flags |= NETLINK_RECV_NO_ENOBUFS; + clear_bit(0, &nlk->state); + wake_up_interruptible(&nlk->wait); + } else + nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1224,6 +1239,16 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname, return -EFAULT; err = 0; break; + case NETLINK_NO_ENOBUFS: + if (len < sizeof(int)) + return -EINVAL; + len = sizeof(int); + val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0; + if (put_user(len, optlen) || + put_user(val, optval)) + return -EFAULT; + err = 0; + break; default: err = -ENOPROTOOPT; } @@ -1879,12 +1904,12 @@ static int netlink_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, "sk Eth Pid Groups " - "Rmem Wmem Dump Locks\n"); + "Rmem Wmem Dump Locks Drops\n"); else { struct sock *s = v; struct netlink_sock *nlk = nlk_sk(s); - seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %d\n", + seq_printf(seq, "%p %-3d %-6d %08x %-8d %-8d %p %-8d %-8d\n", s, s->sk_protocol, nlk->pid, @@ -1892,7 +1917,8 @@ static int netlink_seq_show(struct seq_file *seq, void *v) atomic_read(&s->sk_rmem_alloc), atomic_read(&s->sk_wmem_alloc), nlk->cb, - atomic_read(&s->sk_refcnt) + atomic_read(&s->sk_refcnt), + atomic_read(&s->sk_drops) ); } -- cgit v1.2.3 From ffa6a7054d172a2f57248dff2de600ca795c5656 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Wed, 4 Mar 2009 12:44:00 +0100 Subject: Driver core: Fix device_move() vs. dpm list ordering, v2 dpm_list currently relies on the fact that child devices will be registered after their parents to get a correct suspend order. Using device_move() however destroys this assumption, as an already registered device may be moved under a newly registered one. This patch adds a new argument to device_move(), allowing callers to specify how dpm_list should be adapted. Signed-off-by: Cornelia Huck Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- net/bluetooth/hci_sysfs.c | 2 +- net/bluetooth/rfcomm/tty.c | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 1a1f916be44e..ed82796d4a0f 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -140,7 +140,7 @@ static void del_conn(struct work_struct *work) dev = device_find_child(&conn->dev, NULL, __match_tty); if (!dev) break; - device_move(dev, NULL); + device_move(dev, NULL, DPM_ORDER_DEV_LAST); put_device(dev); } diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index d030c69cb5a3..abdc703a11d2 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -731,7 +731,8 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp) remove_wait_queue(&dev->wait, &wait); if (err == 0) - device_move(dev->tty_dev, rfcomm_get_device(dev)); + device_move(dev->tty_dev, rfcomm_get_device(dev), + DPM_ORDER_DEV_AFTER_PARENT); rfcomm_tty_copy_pending(dev); @@ -751,7 +752,7 @@ static void rfcomm_tty_close(struct tty_struct *tty, struct file *filp) if (atomic_dec_and_test(&dev->opened)) { if (dev->tty_dev->parent) - device_move(dev->tty_dev, NULL); + device_move(dev->tty_dev, NULL, DPM_ORDER_DEV_LAST); /* Close DLC and dettach TTY */ rfcomm_dlc_close(dev->dlc, 0); -- cgit v1.2.3 From e9d376f0fa66bd630fe27403669c6ae6c22a868f Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Thu, 5 Feb 2009 11:51:38 -0500 Subject: dynamic debug: combine dprintk and dynamic printk This patch combines Greg Bank's dprintk() work with the existing dynamic printk patchset, we are now calling it 'dynamic debug'. The new feature of this patchset is a richer /debugfs control file interface, (an example output from my system is at the bottom), which allows fined grained control over the the debug output. The output can be controlled by function, file, module, format string, and line number. for example, enabled all debug messages in module 'nf_conntrack': echo -n 'module nf_conntrack +p' > /mnt/debugfs/dynamic_debug/control to disable them: echo -n 'module nf_conntrack -p' > /mnt/debugfs/dynamic_debug/control A further explanation can be found in the documentation patch. Signed-off-by: Greg Banks Signed-off-by: Jason Baron Signed-off-by: Greg Kroah-Hartman --- net/netfilter/nf_conntrack_pptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 9e169ef2e854..12bd09dbd36c 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -66,7 +66,7 @@ void struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); -#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG) +#if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) /* PptpControlMessageType names */ const char *const pptp_msg_name[] = { "UNKNOWN_MESSAGE", -- cgit v1.2.3 From 783ed5a78373253052bc61a3c5c8b9f17af4e3c6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 24 Mar 2009 16:24:48 +0000 Subject: ipv6: Disallow binding to v4-mapped address on v6-only socket. A socket marked v6-only, can not receive or send traffic to v4-mapped addresses. Thus allowing binding to v4-mapped address on such a socket makes no sense. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index fbf533cc9dce..7f092fa912bd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -276,6 +276,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { + /* Binding to v4-mapped address on a v6-only socket + * makes no sense + */ + if (np->ipv6only) { + err = -EINVAL; + goto out; + } v4addr = addr->sin6_addr.s6_addr32[3]; if (inet_addr_type(net, v4addr) != RTN_LOCAL) { err = -EADDRNOTAVAIL; -- cgit v1.2.3 From 0f8d3c7ac3693d7b6c731bf2159273a59bf70e12 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 24 Mar 2009 16:24:49 +0000 Subject: ipv6: Allow ipv4 wildcard binds after ipv6 address binds The IPv4 wildcard (0.0.0.0) address does not intersect in any way with explicit IPv6 addresses. These two should be permitted, but the IPv4 conflict code checks the ipv6only bit as part of the test. Since binding to an explicit IPv6 address restricts the socket to only that IPv6 address, the side-effect is that the socket behaves as v6-only. By explicitely setting ipv6only in this case, allows the 2 binds to succeed. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7f092fa912bd..9b6a37d16fb0 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -346,8 +346,11 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; } - if (addr_type != IPV6_ADDR_ANY) + if (addr_type != IPV6_ADDR_ANY) { sk->sk_userlocks |= SOCK_BINDADDR_LOCK; + if (addr_type != IPV6_ADDR_MAPPED) + np->ipv6only = 1; + } if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->sport = htons(inet->num); -- cgit v1.2.3 From 63d9950b08184e6531adceb65f64b429909cc101 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 24 Mar 2009 16:24:50 +0000 Subject: ipv6: Make v4-mapped bindings consistent with IPv4 Binding to a v4-mapped address on an AF_INET6 socket should produce the same result as binding to an IPv4 address on AF_INET socket. The two are interchangable as v4-mapped address is really a portability aid. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv6/af_inet6.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 9b6a37d16fb0..61f55386a236 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -276,6 +276,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { + int chk_addr_ret; + /* Binding to v4-mapped address on a v6-only socket * makes no sense */ @@ -283,11 +285,17 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) err = -EINVAL; goto out; } + + /* Reproduce AF_INET checks to make the bindings consitant */ v4addr = addr->sin6_addr.s6_addr32[3]; - if (inet_addr_type(net, v4addr) != RTN_LOCAL) { - err = -EADDRNOTAVAIL; + chk_addr_ret = inet_addr_type(net, v4addr); + if (!sysctl_ip_nonlocal_bind && + !(inet->freebind || inet->transparent) && + v4addr != htonl(INADDR_ANY) && + chk_addr_ret != RTN_LOCAL && + chk_addr_ret != RTN_MULTICAST && + chk_addr_ret != RTN_BROADCAST) goto out; - } } else { if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; -- cgit v1.2.3 From b2f5e7cd3dee2ed721bf0675e1a1ddebb849aee6 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 24 Mar 2009 16:24:51 +0000 Subject: ipv6: Fix conflict resolutions during ipv6 binding The ipv6 version of bind_conflict code calls ipv6_rcv_saddr_equal() which at times wrongly identified intersections between addresses. It particularly broke down under a few instances and caused erroneous bind conflicts. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/ipv4/udp.c | 3 ++- net/ipv6/addrconf.c | 34 ---------------------------------- net/ipv6/udp.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 30 insertions(+), 35 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 05b7abb99f69..ace2ac8a42f7 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -222,7 +222,7 @@ fail: return error; } -static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); @@ -1819,6 +1819,7 @@ EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); EXPORT_SYMBOL(udp_lib_get_port); +EXPORT_SYMBOL(ipv4_rcv_saddr_equal); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8499da9e76a2..a8218bc1806a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1370,40 +1370,6 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add return ifp; } -int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) -{ - const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; - const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); - int sk_ipv6only = ipv6_only_sock(sk); - int sk2_ipv6only = inet_v6_ipv6only(sk2); - int addr_type = ipv6_addr_type(sk_rcv_saddr6); - int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; - - if (!sk2_rcv_saddr && !sk_ipv6only) - return 1; - - if (addr_type2 == IPV6_ADDR_ANY && - !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; - - if (addr_type == IPV6_ADDR_ANY && - !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; - - if (sk2_rcv_saddr6 && - ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) - return 1; - - if (addr_type == IPV6_ADDR_MAPPED && - !sk2_ipv6only && - (!sk2_rcv_saddr || !sk_rcv_saddr || sk_rcv_saddr == sk2_rcv_saddr)) - return 1; - - return 0; -} - /* Gets referenced address, destroys ifaddr */ static void addrconf_dad_stop(struct inet6_ifaddr *ifp) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 84b1a296eecb..6842dd2edd5b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -49,6 +49,34 @@ #include #include "udp_impl.h" +int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) +{ + const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; + const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); + int sk_ipv6only = ipv6_only_sock(sk); + int sk2_ipv6only = inet_v6_ipv6only(sk2); + int addr_type = ipv6_addr_type(sk_rcv_saddr6); + int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; + + /* if both are mapped, treat as IPv4 */ + if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) + return ipv4_rcv_saddr_equal(sk, sk2); + + if (addr_type2 == IPV6_ADDR_ANY && + !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) + return 1; + + if (addr_type == IPV6_ADDR_ANY && + !(sk_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) + return 1; + + if (sk2_rcv_saddr6 && + ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) + return 1; + + return 0; +} + int udp_v6_get_port(struct sock *sk, unsigned short snum) { return udp_lib_get_port(sk, snum, ipv6_rcv_saddr_equal); -- cgit v1.2.3 From a9a9adfe2f99ddadfb574a098392a007970a1577 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2009 17:21:34 +0100 Subject: netfilter: fix xt_LED build failure net/netfilter/xt_LED.c:40: error: field netfilter_led_trigger has incomplete type net/netfilter/xt_LED.c: In function led_timeout_callback: net/netfilter/xt_LED.c:78: warning: unused variable ledinternal net/netfilter/xt_LED.c: In function led_tg_check: net/netfilter/xt_LED.c:102: error: implicit declaration of function led_trigger_register net/netfilter/xt_LED.c: In function led_tg_destroy: net/netfilter/xt_LED.c:135: error: implicit declaration of function led_trigger_unregister Fix by adding a dependency on LED_TRIGGERS. Reported-by: Sachin Sant Tested-by: Subrata Modak Signed-off-by: Patrick McHardy --- net/netfilter/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2562d05dbaf5..2c967e4f706c 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -374,7 +374,7 @@ config NETFILTER_XT_TARGET_HL config NETFILTER_XT_TARGET_LED tristate '"LED" target support' - depends on LEDS_CLASS + depends on LEDS_CLASS && LED_TRIGGERS depends on NETFILTER_ADVANCED help This option adds a `LED' target, which allows you to blink LEDs in -- cgit v1.2.3 From 78f3648601fdc7a8166748bbd6d0555a88efa24a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Mar 2009 17:24:34 +0100 Subject: netfilter: nf_conntrack: use hlist_add_head_rcu() in nf_conntrack_set_hashsize() Using hlist_add_head() in nf_conntrack_set_hashsize() is quite dangerous. Without any barrier, one CPU could see a loop while doing its lookup. Its true new table cannot be seen by another cpu, but previous table is still readable. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 55befe59e1c0..54e983f13898 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1121,7 +1121,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) struct nf_conntrack_tuple_hash, hnode); hlist_del_rcu(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); - hlist_add_head(&h->hnode, &hash[bucket]); + hlist_add_head_rcu(&h->hnode, &hash[bucket]); } } old_size = nf_conntrack_htable_size; -- cgit v1.2.3 From b8dfe498775de912116f275680ddb57c8799d9ef Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Mar 2009 17:31:52 +0100 Subject: netfilter: factorize ifname_compare() We use same not trivial helper function in four places. We can factorize it. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 14 +------------- net/ipv4/netfilter/ip_tables.c | 23 ++--------------------- net/ipv6/netfilter/ip6_tables.c | 23 ++--------------------- net/netfilter/xt_physdev.c | 21 ++------------------- 4 files changed, 7 insertions(+), 74 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 64a7c6ce0b98..4b35dba7cf7d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -80,19 +80,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) { #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - const unsigned long *a = (const unsigned long *)_a; - const unsigned long *b = (const unsigned long *)_b; - const unsigned long *mask = (const unsigned long *)_mask; - unsigned long ret; - - ret = (a[0] ^ b[0]) & mask[0]; - if (IFNAMSIZ > sizeof(unsigned long)) - ret |= (a[1] ^ b[1]) & mask[1]; - if (IFNAMSIZ > 2 * sizeof(unsigned long)) - ret |= (a[2] ^ b[2]) & mask[2]; - if (IFNAMSIZ > 3 * sizeof(unsigned long)) - ret |= (a[3] ^ b[3]) & mask[3]; - BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + unsigned long ret = ifname_compare_aligned(_a, _b, _mask); #else unsigned long ret = 0; int i; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index e5294aec967d..41c59e391a6a 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -74,25 +74,6 @@ do { \ Hence the start of any table is given by get_table() below. */ -static unsigned long ifname_compare(const char *_a, const char *_b, - const unsigned char *_mask) -{ - const unsigned long *a = (const unsigned long *)_a; - const unsigned long *b = (const unsigned long *)_b; - const unsigned long *mask = (const unsigned long *)_mask; - unsigned long ret; - - ret = (a[0] ^ b[0]) & mask[0]; - if (IFNAMSIZ > sizeof(unsigned long)) - ret |= (a[1] ^ b[1]) & mask[1]; - if (IFNAMSIZ > 2 * sizeof(unsigned long)) - ret |= (a[2] ^ b[2]) & mask[2]; - if (IFNAMSIZ > 3 * sizeof(unsigned long)) - ret |= (a[3] ^ b[3]) & mask[3]; - BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); - return ret; -} - /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -121,7 +102,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask); + ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -130,7 +111,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask); + ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 34af7bb8df5f..e59662b3b5b9 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -89,25 +89,6 @@ ip6t_ext_hdr(u8 nexthdr) (nexthdr == IPPROTO_DSTOPTS) ); } -static unsigned long ifname_compare(const char *_a, const char *_b, - const unsigned char *_mask) -{ - const unsigned long *a = (const unsigned long *)_a; - const unsigned long *b = (const unsigned long *)_b; - const unsigned long *mask = (const unsigned long *)_mask; - unsigned long ret; - - ret = (a[0] ^ b[0]) & mask[0]; - if (IFNAMSIZ > sizeof(unsigned long)) - ret |= (a[1] ^ b[1]) & mask[1]; - if (IFNAMSIZ > 2 * sizeof(unsigned long)) - ret |= (a[2] ^ b[2]) & mask[2]; - if (IFNAMSIZ > 3 * sizeof(unsigned long)) - ret |= (a[3] ^ b[3]) & mask[3]; - BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); - return ret; -} - /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -138,7 +119,7 @@ ip6_packet_match(const struct sk_buff *skb, return false; } - ret = ifname_compare(indev, ip6info->iniface, ip6info->iniface_mask); + ret = ifname_compare_aligned(indev, ip6info->iniface, ip6info->iniface_mask); if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -147,7 +128,7 @@ ip6_packet_match(const struct sk_buff *skb, return false; } - ret = ifname_compare(outdev, ip6info->outiface, ip6info->outiface_mask); + ret = ifname_compare_aligned(outdev, ip6info->outiface, ip6info->outiface_mask); if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 44a234ef4439..8d28ca5848bc 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -20,23 +20,6 @@ MODULE_DESCRIPTION("Xtables: Bridge physical device match"); MODULE_ALIAS("ipt_physdev"); MODULE_ALIAS("ip6t_physdev"); -static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) -{ - const unsigned long *a = (const unsigned long *)_a; - const unsigned long *b = (const unsigned long *)_b; - const unsigned long *mask = (const unsigned long *)_mask; - unsigned long ret; - - ret = (a[0] ^ b[0]) & mask[0]; - if (IFNAMSIZ > sizeof(unsigned long)) - ret |= (a[1] ^ b[1]) & mask[1]; - if (IFNAMSIZ > 2 * sizeof(unsigned long)) - ret |= (a[2] ^ b[2]) & mask[2]; - if (IFNAMSIZ > 3 * sizeof(unsigned long)) - ret |= (a[3] ^ b[3]) & mask[3]; - BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); - return ret; -} static bool physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) @@ -85,7 +68,7 @@ physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - ret = ifname_compare(indev, info->physindev, info->in_mask); + ret = ifname_compare_aligned(indev, info->physindev, info->in_mask); if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) return false; @@ -95,7 +78,7 @@ match_outdev: return true; outdev = nf_bridge->physoutdev ? nf_bridge->physoutdev->name : nulldevname; - ret = ifname_compare(outdev, info->physoutdev, info->out_mask); + ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask); return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } -- cgit v1.2.3 From d0dba7255b541f1651a88e75ebdb20dd45509c2f Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Wed, 25 Mar 2009 18:24:48 +0100 Subject: netfilter: ctnetlink: add callbacks to the per-proto nlattrs There is added a single callback for the l3 proto helper. The two callbacks for the l4 protos are necessary because of the general structure of a ctnetlink event, which is in short: CTA_TUPLE_ORIG CTA_TUPLE_REPLY CTA_ID ... CTA_PROTOINFO CTA_TUPLE_MASTER Therefore the formular is size := sizeof(generic-nlas) + 3 * sizeof(tuple_nlas) + sizeof(protoinfo_nlas) Some of the NLAs are optional, e. g. CTA_TUPLE_MASTER, which is only set if it's an expected connection. But the number of optional NLAs is small enough to prevent netlink_trim() from reallocating if calculated properly. Signed-off-by: Holger Eitzenberger Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_proto.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 9a62b4efa0e1..1a4568bf7ea5 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -167,6 +167,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) if (proto->l3proto >= AF_MAX) return -EBUSY; + if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size) + return -EINVAL; + mutex_lock(&nf_ct_proto_mutex); if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_l3proto_generic) { ret = -EBUSY; @@ -177,6 +180,9 @@ int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) if (ret < 0) goto out_unlock; + if (proto->nlattr_tuple_size) + proto->nla_size = 3 * proto->nlattr_tuple_size(); + rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto); out_unlock: @@ -263,6 +269,10 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) if (l4proto->l3proto >= PF_MAX) return -EBUSY; + if ((l4proto->to_nlattr && !l4proto->nlattr_size) + || (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size)) + return -EINVAL; + mutex_lock(&nf_ct_proto_mutex); if (!nf_ct_protos[l4proto->l3proto]) { /* l3proto may be loaded latter. */ @@ -290,6 +300,12 @@ int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *l4proto) if (ret < 0) goto out_unlock; + l4proto->nla_size = 0; + if (l4proto->nlattr_size) + l4proto->nla_size += l4proto->nlattr_size(); + if (l4proto->nlattr_tuple_size) + l4proto->nla_size += 3 * l4proto->nlattr_tuple_size(); + rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto); -- cgit v1.2.3 From e487eb99cf9381a4f8254fa01747a85818da612b Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Wed, 25 Mar 2009 18:26:30 +0100 Subject: netlink: add nla_policy_len() It calculates the max. length of a Netlink policy, which is usefull for allocating Netlink buffers roughly the size of the actual message. Signed-off-by: Holger Eitzenberger Signed-off-by: Patrick McHardy --- net/netlink/attr.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'net') diff --git a/net/netlink/attr.c b/net/netlink/attr.c index 56c3ce7fe29a..ae32c573df00 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -132,6 +132,32 @@ errout: return err; } +/** + * nla_policy_len - Determin the max. length of a policy + * @policy: policy to use + * @n: number of policies + * + * Determines the max. length of the policy. It is currently used + * to allocated Netlink buffers roughly the size of the actual + * message. + * + * Returns 0 on success or a negative error code. + */ +int +nla_policy_len(const struct nla_policy *p, int n) +{ + int i, len = 0; + + for (i = 0; i < n; i++) { + if (p->len) + len += nla_total_size(p->len); + else if (nla_attr_minlen[p->type]) + len += nla_total_size(nla_attr_minlen[p->type]); + } + + return len; +} + /** * nla_parse - Parse a stream of attributes into a tb buffer * @tb: destination array with maxtype+1 elements @@ -456,6 +482,7 @@ int nla_append(struct sk_buff *skb, int attrlen, const void *data) } EXPORT_SYMBOL(nla_validate); +EXPORT_SYMBOL(nla_policy_len); EXPORT_SYMBOL(nla_parse); EXPORT_SYMBOL(nla_find); EXPORT_SYMBOL(nla_strlcpy); -- cgit v1.2.3 From af9d32ad6718b9a80fa89f557cc1fbb63a93ec15 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Wed, 25 Mar 2009 18:44:01 +0100 Subject: netfilter: limit the length of the helper name This is necessary in order to have an upper bound for Netlink message calculation, which is not a problem at all, as there are no helpers with a longer name. Signed-off-by: Holger Eitzenberger Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_helper.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index a51bdac9f3a0..805cfdd42303 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -142,6 +142,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); + BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1); mutex_lock(&nf_ct_helper_mutex); hlist_add_head_rcu(&me->hnode, &nf_ct_helper_hash[h]); -- cgit v1.2.3 From 1f9352ae2253a97b07b34dcf16ffa3b4ca12c558 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 25 Mar 2009 19:26:35 +0100 Subject: netfilter: {ip,ip6,arp}_tables: fix incorrect loop detection Commit e1b4b9f ([NETFILTER]: {ip,ip6,arp}_tables: fix exponential worst-case search for loops) introduced a regression in the loop detection algorithm, causing sporadic incorrectly detected loops. When a chain has already been visited during the check, it is treated as having a standard target containing a RETURN verdict directly at the beginning in order to not check it again. The real target of the first rule is then incorrectly treated as STANDARD target and checked not to contain invalid verdicts. Fix by making sure the rule does actually contain a standard target. Based on patch by Francis Dupont Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/arp_tables.c | 4 +++- net/ipv4/netfilter/ip_tables.c | 4 +++- net/ipv6/netfilter/ip6_tables.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4b35dba7cf7d..4f454ce9a602 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -388,7 +388,9 @@ static int mark_source_chains(struct xt_table_info *newinfo, && unconditional(&e->arp)) || visited) { unsigned int oldpos, size; - if (t->verdict < -NF_MAX_VERDICT - 1) { + if ((strcmp(t->target.u.user.name, + ARPT_STANDARD_TARGET) == 0) && + t->verdict < -NF_MAX_VERDICT - 1) { duprintf("mark_source_chains: bad " "negative verdict (%i)\n", t->verdict); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 41c59e391a6a..82ee7c9049ff 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -488,7 +488,9 @@ mark_source_chains(struct xt_table_info *newinfo, && unconditional(&e->ip)) || visited) { unsigned int oldpos, size; - if (t->verdict < -NF_MAX_VERDICT - 1) { + if ((strcmp(t->target.u.user.name, + IPT_STANDARD_TARGET) == 0) && + t->verdict < -NF_MAX_VERDICT - 1) { duprintf("mark_source_chains: bad " "negative verdict (%i)\n", t->verdict); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e59662b3b5b9..e89cfa3a8f25 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -517,7 +517,9 @@ mark_source_chains(struct xt_table_info *newinfo, && unconditional(&e->ipv6)) || visited) { unsigned int oldpos, size; - if (t->verdict < -NF_MAX_VERDICT - 1) { + if ((strcmp(t->target.u.user.name, + IP6T_STANDARD_TARGET) == 0) && + t->verdict < -NF_MAX_VERDICT - 1) { duprintf("mark_source_chains: bad " "negative verdict (%i)\n", t->verdict); -- cgit v1.2.3 From ea781f197d6a835cbb93a0bf88ee1696296ed8aa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 25 Mar 2009 21:05:46 +0100 Subject: netfilter: nf_conntrack: use SLAB_DESTROY_BY_RCU and get rid of call_rcu() Use "hlist_nulls" infrastructure we added in 2.6.29 for RCUification of UDP & TCP. This permits an easy conversion from call_rcu() based hash lists to a SLAB_DESTROY_BY_RCU one. Avoiding call_rcu() delay at nf_conn freeing time has numerous gains. First, it doesnt fill RCU queues (up to 10000 elements per cpu). This reduces OOM possibility, if queued elements are not taken into account This reduces latency problems when RCU queue size hits hilimit and triggers emergency mode. - It allows fast reuse of just freed elements, permitting better use of CPU cache. - We delete rcu_head from "struct nf_conn", shrinking size of this structure by 8 or 16 bytes. This patch only takes care of "struct nf_conn". call_rcu() is still used for less critical conntrack parts, that may be converted later if necessary. Signed-off-by: Eric Dumazet Signed-off-by: Patrick McHardy --- .../netfilter/nf_conntrack_l3proto_ipv4_compat.c | 63 ++++++----- net/ipv4/netfilter/nf_nat_core.c | 2 +- net/netfilter/nf_conntrack_core.c | 123 ++++++++++++--------- net/netfilter/nf_conntrack_expect.c | 2 +- net/netfilter/nf_conntrack_helper.c | 7 +- net/netfilter/nf_conntrack_netlink.c | 20 ++-- net/netfilter/nf_conntrack_standalone.c | 57 ++++++---- net/netfilter/xt_connlimit.c | 6 +- 8 files changed, 160 insertions(+), 120 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 6ba5c557690c..8668a3defda6 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -25,40 +25,42 @@ struct ct_iter_state { unsigned int bucket; }; -static struct hlist_node *ct_get_first(struct seq_file *seq) +static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) { struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; - struct hlist_node *n; + struct hlist_nulls_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); - if (n) + if (!is_a_nulls(n)) return n; } return NULL; } -static struct hlist_node *ct_get_next(struct seq_file *seq, - struct hlist_node *head) +static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, + struct hlist_nulls_node *head) { struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(head->next); - while (head == NULL) { - if (++st->bucket >= nf_conntrack_htable_size) - return NULL; + while (is_a_nulls(head)) { + if (likely(get_nulls_value(head) == st->bucket)) { + if (++st->bucket >= nf_conntrack_htable_size) + return NULL; + } head = rcu_dereference(net->ct.hash[st->bucket].first); } return head; } -static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) +static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) { - struct hlist_node *head = ct_get_first(seq); + struct hlist_nulls_node *head = ct_get_first(seq); if (head) while (pos && (head = ct_get_next(seq, head))) @@ -87,69 +89,76 @@ static void ct_seq_stop(struct seq_file *s, void *v) static int ct_seq_show(struct seq_file *s, void *v) { - const struct nf_conntrack_tuple_hash *hash = v; - const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); + struct nf_conntrack_tuple_hash *hash = v; + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); const struct nf_conntrack_l3proto *l3proto; const struct nf_conntrack_l4proto *l4proto; + int ret = 0; NF_CT_ASSERT(ct); + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + return 0; + /* we only want to print DIR_ORIGINAL */ if (NF_CT_DIRECTION(hash)) - return 0; + goto release; if (nf_ct_l3num(ct) != AF_INET) - return 0; + goto release; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); NF_CT_ASSERT(l3proto); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); NF_CT_ASSERT(l4proto); + ret = -ENOSPC; if (seq_printf(s, "%-8s %u %ld ", l4proto->name, nf_ct_protonum(ct), timer_pending(&ct->timeout) ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) - return -ENOSPC; + goto release; if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) - return -ENOSPC; + goto release; if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, l3proto, l4proto)) - return -ENOSPC; + goto release; if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) - return -ENOSPC; + goto release; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) if (seq_printf(s, "[UNREPLIED] ")) - return -ENOSPC; + goto release; if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l3proto, l4proto)) - return -ENOSPC; + goto release; if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) - return -ENOSPC; + goto release; if (test_bit(IPS_ASSURED_BIT, &ct->status)) if (seq_printf(s, "[ASSURED] ")) - return -ENOSPC; + goto release; #ifdef CONFIG_NF_CONNTRACK_MARK if (seq_printf(s, "mark=%u ", ct->mark)) - return -ENOSPC; + goto release; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK if (seq_printf(s, "secmark=%u ", ct->secmark)) - return -ENOSPC; + goto release; #endif if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) - return -ENOSPC; - - return 0; + goto release; + ret = 0; +release: + nf_ct_put(ct); + return ret; } static const struct seq_operations ct_seq_ops = { diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index a65cf692359f..fe65187810f0 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -679,7 +679,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, static int __net_init nf_nat_net_init(struct net *net) { net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &net->ipv4.nat_vmalloced); + &net->ipv4.nat_vmalloced, 0); if (!net->ipv4.nat_bysource) return -ENOMEM; return 0; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 54e983f13898..c55bbdc7d429 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -163,8 +164,8 @@ static void clean_from_lists(struct nf_conn *ct) { pr_debug("clean_from_lists(%p)\n", ct); - hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); - hlist_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode); + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -204,8 +205,8 @@ destroy_conntrack(struct nf_conntrack *nfct) /* We overload first tuple to link into unconfirmed list. */ if (!nf_ct_is_confirmed(ct)) { - BUG_ON(hlist_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode)); - hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); + BUG_ON(hlist_nulls_unhashed(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode)); + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); } NF_CT_STAT_INC(net, delete); @@ -242,18 +243,26 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_put(ct); } +/* + * Warning : + * - Caller must take a reference on returned object + * and recheck nf_ct_tuple_equal(tuple, &h->tuple) + * OR + * - Caller must lock nf_conntrack_lock before calling this function + */ struct nf_conntrack_tuple_hash * __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; - struct hlist_node *n; + struct hlist_nulls_node *n; unsigned int hash = hash_conntrack(tuple); /* Disable BHs the entire time since we normally need to disable them * at least once for the stats anyway. */ local_bh_disable(); - hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { +begin: + hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { if (nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(net, found); local_bh_enable(); @@ -261,6 +270,13 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) } NF_CT_STAT_INC(net, searched); } + /* + * if the nulls value we got at the end of this lookup is + * not the expected one, we must restart lookup. + * We probably met an item that was moved to another chain. + */ + if (get_nulls_value(n) != hash) + goto begin; local_bh_enable(); return NULL; @@ -275,11 +291,18 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) struct nf_conn *ct; rcu_read_lock(); +begin: h = __nf_conntrack_find(net, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) h = NULL; + else { + if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple))) { + nf_ct_put(ct); + goto begin; + } + } } rcu_read_unlock(); @@ -293,9 +316,9 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, { struct net *net = nf_ct_net(ct); - hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &net->ct.hash[hash]); - hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode, &net->ct.hash[repl_hash]); } @@ -318,7 +341,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; struct nf_conn_help *help; - struct hlist_node *n; + struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; @@ -350,17 +373,17 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode) + hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple)) goto out; - hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode) + hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple)) goto out; /* Remove from unconfirmed list */ - hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); + hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode); __nf_conntrack_hash_insert(ct, hash, repl_hash); /* Timer relative to confirmation time, not original @@ -399,14 +422,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, { struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; - struct hlist_node *n; + struct hlist_nulls_node *n; unsigned int hash = hash_conntrack(tuple); /* Disable BHs the entire time since we need to disable them at * least once for the stats anyway. */ rcu_read_lock_bh(); - hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { + hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { NF_CT_STAT_INC(net, found); @@ -430,14 +453,14 @@ static noinline int early_drop(struct net *net, unsigned int hash) /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; struct nf_conn *ct = NULL, *tmp; - struct hlist_node *n; + struct hlist_nulls_node *n; unsigned int i, cnt = 0; int dropped = 0; rcu_read_lock(); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], - hnode) { + hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash], + hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) ct = tmp; @@ -508,27 +531,19 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, #ifdef CONFIG_NET_NS ct->ct_net = net; #endif - INIT_RCU_HEAD(&ct->rcu); return ct; } EXPORT_SYMBOL_GPL(nf_conntrack_alloc); -static void nf_conntrack_free_rcu(struct rcu_head *head) -{ - struct nf_conn *ct = container_of(head, struct nf_conn, rcu); - - nf_ct_ext_free(ct); - kmem_cache_free(nf_conntrack_cachep, ct); -} - void nf_conntrack_free(struct nf_conn *ct) { struct net *net = nf_ct_net(ct); nf_ct_ext_destroy(ct); atomic_dec(&net->ct.count); - call_rcu(&ct->rcu, nf_conntrack_free_rcu); + nf_ct_ext_free(ct); + kmem_cache_free(nf_conntrack_cachep, ct); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -594,7 +609,7 @@ init_conntrack(struct net *net, } /* Overload tuple linked list to put us in unconfirmed list. */ - hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, &net->ct.unconfirmed); spin_unlock_bh(&nf_conntrack_lock); @@ -934,17 +949,17 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; - struct hlist_node *n; + struct hlist_nulls_node *n; spin_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) { + hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; } } - hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) { + hlist_nulls_for_each_entry(h, n, &net->ct.unconfirmed, hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); @@ -992,7 +1007,7 @@ static int kill_all(struct nf_conn *i, void *data) return 1; } -void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int size) +void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size) { if (vmalloced) vfree(hash); @@ -1060,26 +1075,28 @@ void nf_conntrack_cleanup(struct net *net) } } -struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) +void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls) { - struct hlist_head *hash; - unsigned int size, i; + struct hlist_nulls_head *hash; + unsigned int nr_slots, i; + size_t sz; *vmalloced = 0; - size = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_head)); - hash = (void*)__get_free_pages(GFP_KERNEL|__GFP_NOWARN, - get_order(sizeof(struct hlist_head) - * size)); + BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head)); + nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head)); + sz = nr_slots * sizeof(struct hlist_nulls_head); + hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO, + get_order(sz)); if (!hash) { *vmalloced = 1; printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); - hash = vmalloc(sizeof(struct hlist_head) * size); + hash = __vmalloc(sz, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); } - if (hash) - for (i = 0; i < size; i++) - INIT_HLIST_HEAD(&hash[i]); + if (hash && nulls) + for (i = 0; i < nr_slots; i++) + INIT_HLIST_NULLS_HEAD(&hash[i], i); return hash; } @@ -1090,7 +1107,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) int i, bucket, vmalloced, old_vmalloced; unsigned int hashsize, old_size; int rnd; - struct hlist_head *hash, *old_hash; + struct hlist_nulls_head *hash, *old_hash; struct nf_conntrack_tuple_hash *h; /* On boot, we can set this without any fancy locking. */ @@ -1101,7 +1118,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) if (!hashsize) return -EINVAL; - hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced); + hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1); if (!hash) return -ENOMEM; @@ -1116,12 +1133,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) */ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { - while (!hlist_empty(&init_net.ct.hash[i])) { - h = hlist_entry(init_net.ct.hash[i].first, - struct nf_conntrack_tuple_hash, hnode); - hlist_del_rcu(&h->hnode); + while (!hlist_nulls_empty(&init_net.ct.hash[i])) { + h = hlist_nulls_entry(init_net.ct.hash[i].first, + struct nf_conntrack_tuple_hash, hnnode); + hlist_nulls_del_rcu(&h->hnnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); - hlist_add_head_rcu(&h->hnode, &hash[bucket]); + hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]); } } old_size = nf_conntrack_htable_size; @@ -1172,7 +1189,7 @@ static int nf_conntrack_init_init_net(void) nf_conntrack_cachep = kmem_cache_create("nf_conntrack", sizeof(struct nf_conn), - 0, 0, NULL); + 0, SLAB_DESTROY_BY_RCU, NULL); if (!nf_conntrack_cachep) { printk(KERN_ERR "Unable to create nf_conn slab cache\n"); ret = -ENOMEM; @@ -1202,7 +1219,7 @@ static int nf_conntrack_init_net(struct net *net) int ret; atomic_set(&net->ct.count, 0); - INIT_HLIST_HEAD(&net->ct.unconfirmed); + INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; @@ -1212,7 +1229,7 @@ static int nf_conntrack_init_net(struct net *net) if (ret < 0) goto err_ecache; net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, - &net->ct.hash_vmalloc); + &net->ct.hash_vmalloc, 1); if (!net->ct.hash) { ret = -ENOMEM; printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 357ba39d4c8d..3940f996a2e4 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -604,7 +604,7 @@ int nf_conntrack_expect_init(struct net *net) net->ct.expect_count = 0; net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, - &net->ct.expect_vmalloc); + &net->ct.expect_vmalloc, 0); if (net->ct.expect_hash == NULL) goto err1; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 805cfdd42303..30b8e9009f99 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -159,6 +159,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; const struct hlist_node *n, *next; + const struct hlist_nulls_node *nn; unsigned int i; /* Get rid of expectations */ @@ -175,10 +176,10 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, } /* Get rid of expecteds, set helpers to NULL. */ - hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) + hlist_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry(h, n, &net->ct.hash[i], hnode) + hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) unhelp(h, me); } } @@ -218,7 +219,7 @@ int nf_conntrack_helper_init(void) nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, - &nf_ct_helper_vmalloc); + &nf_ct_helper_vmalloc, 0); if (!nf_ct_helper_hash) return -ENOMEM; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 1b75c9efb0eb..349bbefe5517 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -536,7 +537,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { struct nf_conn *ct, *last; struct nf_conntrack_tuple_hash *h; - struct hlist_node *n; + struct hlist_nulls_node *n; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; @@ -544,27 +545,27 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], - hnode) { + hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], + hnnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); + if (!atomic_inc_not_zero(&ct->ct_general.use)) + continue; /* Dump entries of a given L3 protocol number. * If it is not specified, ie. l3proto == 0, * then dump everything. */ if (l3proto && nf_ct_l3num(ct) != l3proto) - continue; + goto releasect; if (cb->args[1]) { if (ct != last) - continue; + goto releasect; cb->args[1] = 0; } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, IPCTNL_MSG_CT_NEW, 1, ct) < 0) { - if (!atomic_inc_not_zero(&ct->ct_general.use)) - continue; cb->args[1] = (unsigned long)ct; goto out; } @@ -577,6 +578,8 @@ restart: if (acct) memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); } +releasect: + nf_ct_put(ct); } if (cb->args[1]) { cb->args[1] = 0; @@ -1242,13 +1245,12 @@ ctnetlink_create_conntrack(struct nlattr *cda[], if (err < 0) goto err2; - master_h = __nf_conntrack_find(&init_net, &master); + master_h = nf_conntrack_find_get(&init_net, &master); if (master_h == NULL) { err = -ENOENT; goto err2; } master_ct = nf_ct_tuplehash_to_ctrack(master_h); - nf_conntrack_get(&master_ct->ct_general); __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = master_ct; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 4da54b0b9233..193515381970 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -44,40 +44,42 @@ struct ct_iter_state { unsigned int bucket; }; -static struct hlist_node *ct_get_first(struct seq_file *seq) +static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) { struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; - struct hlist_node *n; + struct hlist_nulls_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { n = rcu_dereference(net->ct.hash[st->bucket].first); - if (n) + if (!is_a_nulls(n)) return n; } return NULL; } -static struct hlist_node *ct_get_next(struct seq_file *seq, - struct hlist_node *head) +static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, + struct hlist_nulls_node *head) { struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(head->next); - while (head == NULL) { - if (++st->bucket >= nf_conntrack_htable_size) - return NULL; + while (is_a_nulls(head)) { + if (likely(get_nulls_value(head) == st->bucket)) { + if (++st->bucket >= nf_conntrack_htable_size) + return NULL; + } head = rcu_dereference(net->ct.hash[st->bucket].first); } return head; } -static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos) +static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) { - struct hlist_node *head = ct_get_first(seq); + struct hlist_nulls_node *head = ct_get_first(seq); if (head) while (pos && (head = ct_get_next(seq, head))) @@ -107,67 +109,74 @@ static void ct_seq_stop(struct seq_file *s, void *v) /* return 0 on success, 1 in case of error */ static int ct_seq_show(struct seq_file *s, void *v) { - const struct nf_conntrack_tuple_hash *hash = v; - const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); + struct nf_conntrack_tuple_hash *hash = v; + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); const struct nf_conntrack_l3proto *l3proto; const struct nf_conntrack_l4proto *l4proto; + int ret = 0; NF_CT_ASSERT(ct); + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + return 0; /* we only want to print DIR_ORIGINAL */ if (NF_CT_DIRECTION(hash)) - return 0; + goto release; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); NF_CT_ASSERT(l3proto); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); NF_CT_ASSERT(l4proto); + ret = -ENOSPC; if (seq_printf(s, "%-8s %u %-8s %u %ld ", l3proto->name, nf_ct_l3num(ct), l4proto->name, nf_ct_protonum(ct), timer_pending(&ct->timeout) ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) - return -ENOSPC; + goto release; if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) - return -ENOSPC; + goto release; if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, l3proto, l4proto)) - return -ENOSPC; + goto release; if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) - return -ENOSPC; + goto release; if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) if (seq_printf(s, "[UNREPLIED] ")) - return -ENOSPC; + goto release; if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, l3proto, l4proto)) - return -ENOSPC; + goto release; if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) - return -ENOSPC; + goto release; if (test_bit(IPS_ASSURED_BIT, &ct->status)) if (seq_printf(s, "[ASSURED] ")) - return -ENOSPC; + goto release; #if defined(CONFIG_NF_CONNTRACK_MARK) if (seq_printf(s, "mark=%u ", ct->mark)) - return -ENOSPC; + goto release; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK if (seq_printf(s, "secmark=%u ", ct->secmark)) - return -ENOSPC; + goto release; #endif if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) - return -ENOSPC; + goto release; + ret = 0; +release: + nf_ct_put(ct); return 0; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 7f404cc64c83..680980954395 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -108,7 +108,7 @@ static int count_them(struct xt_connlimit_data *data, const struct nf_conntrack_tuple_hash *found; struct xt_connlimit_conn *conn; struct xt_connlimit_conn *tmp; - const struct nf_conn *found_ct; + struct nf_conn *found_ct; struct list_head *hash; bool addit = true; int matches = 0; @@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = __nf_conntrack_find(&init_net, &conn->tuple); + found = nf_conntrack_find_get(&init_net, &conn->tuple); found_ct = NULL; if (found != NULL) @@ -151,6 +151,7 @@ static int count_them(struct xt_connlimit_data *data, * we do not care about connections which are * closed already -> ditch it */ + nf_ct_put(found_ct); list_del(&conn->list); kfree(conn); continue; @@ -160,6 +161,7 @@ static int count_them(struct xt_connlimit_data *data, match->family)) /* same source network -> be counted! */ ++matches; + nf_ct_put(found_ct); } rcu_read_unlock(); -- cgit v1.2.3 From 2732c4e45bb67006fdc9ae6669be866762711ab5 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Wed, 25 Mar 2009 21:50:59 +0100 Subject: netfilter: ctnetlink: allocate right-sized ctnetlink skb Try to allocate a Netlink skb roughly the size of the actual message, with the help from the l3 and l4 protocol helpers. This is all to prevent a reallocation in netlink_trim() later. The overhead of allocating the right-sized skb is rather small, with ctnetlink_alloc_skb() actually being inlined away on my x86_64 box. The size of the per-proto space is determined at registration time of the protocol helper. Signed-off-by: Holger Eitzenberger Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 65 +++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 349bbefe5517..03547c60f389 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -405,6 +405,69 @@ nla_put_failure: } #ifdef CONFIG_NF_CONNTRACK_EVENTS +/* + * The general structure of a ctnetlink event is + * + * CTA_TUPLE_ORIG + * + * CTA_TUPLE_REPLY + * + * CTA_ID + * ... + * CTA_PROTOINFO + * + * CTA_TUPLE_MASTER + * + * + * Therefore the formular is + * + * size = sizeof(headers) + sizeof(generic_nlas) + 3 * sizeof(tuple_nlas) + * + sizeof(protoinfo_nlas) + */ +static struct sk_buff * +ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp) +{ + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_l4proto *l4proto; + int len; + +#define NLA_TYPE_SIZE(type) nla_total_size(sizeof(type)) + + /* proto independant part */ + len = NLMSG_SPACE(sizeof(struct nfgenmsg)) + + 3 * nla_total_size(0) /* CTA_TUPLE_ORIG|REPL|MASTER */ + + 3 * nla_total_size(0) /* CTA_TUPLE_IP */ + + 3 * nla_total_size(0) /* CTA_TUPLE_PROTO */ + + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */ + + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */ + + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */ + + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ + + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */ + + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */ + + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */ + + nla_total_size(0) /* CTA_PROTOINFO */ + + nla_total_size(0) /* CTA_HELP */ + + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ + + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */ + + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */ + + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */ + + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */ + + NLA_TYPE_SIZE(u_int32_t); /* CTA_MARK */ + +#undef NLA_TYPE_SIZE + + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(tuple->src.l3num); + len += l3proto->nla_size; + + l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum); + len += l4proto->nla_size; + rcu_read_unlock(); + + return alloc_skb(len, gfp); +} + static int ctnetlink_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr) { @@ -438,7 +501,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, if (!item->report && !nfnetlink_has_listeners(group)) return NOTIFY_DONE; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); + skb = ctnetlink_alloc_skb(tuple(ct, IP_CT_DIR_ORIGINAL), GFP_ATOMIC); if (!skb) return NOTIFY_DONE; -- cgit v1.2.3 From 5c0de29d06318ec8f6e3ba0d17d62529dbbdc1e8 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Wed, 25 Mar 2009 21:52:17 +0100 Subject: netfilter: nf_conntrack: add generic function to get len of generic policy Usefull for all protocols which do not add additional data, such as GRE or UDPlite. Signed-off-by: Holger Eitzenberger Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_core.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c55bbdc7d429..b182b30c7d8d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -921,6 +921,12 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], return 0; } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); + +int nf_ct_port_nlattr_tuple_size(void) +{ + return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); +} +EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); #endif /* Used by ipt_REJECT and ip6t_REJECT. */ -- cgit v1.2.3 From a400c30edb1958ceb53c4b8ce78989189b36df47 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Wed, 25 Mar 2009 21:53:39 +0100 Subject: netfilter: nf_conntrack: calculate per-protocol nlattr size Signed-off-by: Holger Eitzenberger Signed-off-by: Patrick McHardy --- net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 6 ++++++ net/ipv4/netfilter/nf_conntrack_proto_icmp.c | 6 ++++++ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 6 ++++++ net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c | 6 ++++++ net/netfilter/nf_conntrack_proto_dccp.c | 9 +++++++++ net/netfilter/nf_conntrack_proto_gre.c | 1 + net/netfilter/nf_conntrack_proto_sctp.c | 10 ++++++++++ net/netfilter/nf_conntrack_proto_tcp.c | 15 +++++++++++++++ net/netfilter/nf_conntrack_proto_udp.c | 2 ++ net/netfilter/nf_conntrack_proto_udplite.c | 1 + 10 files changed, 62 insertions(+) (limited to 'net') diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 8b681f24e271..7d2ead7228ac 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -328,6 +328,11 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[], return 0; } + +static int ipv4_nlattr_tuple_size(void) +{ + return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1); +} #endif static struct nf_sockopt_ops so_getorigdst = { @@ -347,6 +352,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .get_l4proto = ipv4_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = ipv4_tuple_to_nlattr, + .nlattr_tuple_size = ipv4_nlattr_tuple_size, .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, #endif diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 2a8bee26f43d..23b2c2ee869a 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -262,6 +262,11 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[], return 0; } + +static int icmp_nlattr_tuple_size(void) +{ + return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); +} #endif #ifdef CONFIG_SYSCTL @@ -309,6 +314,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = .me = NULL, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = icmp_tuple_to_nlattr, + .nlattr_tuple_size = icmp_nlattr_tuple_size, .nlattr_to_tuple = icmp_nlattr_to_tuple, .nla_policy = icmp_nla_policy, #endif diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index e6852f617217..2a15c2d66c69 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -342,6 +342,11 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[], return 0; } + +static int ipv6_nlattr_tuple_size(void) +{ + return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1); +} #endif struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { @@ -353,6 +358,7 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .get_l4proto = ipv6_get_l4proto, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = ipv6_tuple_to_nlattr, + .nlattr_tuple_size = ipv6_nlattr_tuple_size, .nlattr_to_tuple = ipv6_nlattr_to_tuple, .nla_policy = ipv6_nla_policy, #endif diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 165b256a6fa0..032fdf415000 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -268,6 +268,11 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], return 0; } + +static int icmpv6_nlattr_tuple_size(void) +{ + return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); +} #endif #ifdef CONFIG_SYSCTL @@ -299,6 +304,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = .error = icmpv6_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = icmpv6_tuple_to_nlattr, + .nlattr_tuple_size = icmpv6_nlattr_tuple_size, .nlattr_to_tuple = icmpv6_nlattr_to_tuple, .nla_policy = icmpv6_nla_policy, #endif diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index d3d5a7fd73ce..50dac8dbe7d8 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -669,6 +669,12 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) write_unlock_bh(&dccp_lock); return 0; } + +static int dccp_nlattr_size(void) +{ + return nla_total_size(0) /* CTA_PROTOINFO_DCCP */ + + nla_policy_len(dccp_nla_policy, CTA_PROTOINFO_DCCP_MAX + 1); +} #endif #ifdef CONFIG_SYSCTL @@ -749,8 +755,10 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .print_conntrack = dccp_print_conntrack, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = dccp_to_nlattr, + .nlattr_size = dccp_nlattr_size, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif @@ -771,6 +779,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .to_nlattr = dccp_to_nlattr, .from_nlattr = nlattr_to_dccp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 1b279f9d6bf3..117b80112fcb 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -293,6 +293,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 74e037901199..101b4ad9e817 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -537,6 +537,12 @@ static int nlattr_to_sctp(struct nlattr *cda[], struct nf_conn *ct) return 0; } + +static int sctp_nlattr_size(void) +{ + return nla_total_size(0) /* CTA_PROTOINFO_SCTP */ + + nla_policy_len(sctp_nla_policy, CTA_PROTOINFO_SCTP_MAX + 1); +} #endif #ifdef CONFIG_SYSCTL @@ -668,8 +674,10 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = sctp_to_nlattr, + .nlattr_size = sctp_nlattr_size, .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif @@ -696,8 +704,10 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .me = THIS_MODULE, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = sctp_to_nlattr, + .nlattr_size = sctp_nlattr_size, .from_nlattr = nlattr_to_sctp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 7d3944f02ea1..9b9e6718b2d3 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1183,6 +1183,17 @@ static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct) return 0; } + +static int tcp_nlattr_size(void) +{ + return nla_total_size(0) /* CTA_PROTOINFO_TCP */ + + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1); +} + +static int tcp_nlattr_tuple_size(void) +{ + return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); +} #endif #ifdef CONFIG_SYSCTL @@ -1398,9 +1409,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = .error = tcp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = tcp_to_nlattr, + .nlattr_size = tcp_nlattr_size, .from_nlattr = nlattr_to_tcp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = tcp_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL @@ -1428,9 +1441,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = .error = tcp_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .to_nlattr = tcp_to_nlattr, + .nlattr_size = tcp_nlattr_size, .from_nlattr = nlattr_to_tcp, .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = tcp_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index d4021179e24e..70809d117b91 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -195,6 +195,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL @@ -222,6 +223,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nla_policy = nf_ct_port_nla_policy, #endif #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 4579d8de13b1..4614696c1b88 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -180,6 +180,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = .error = udplite_error, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, + .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif -- cgit v1.2.3 From cda6d377ec6b2ee2e58d563d0bd7eb313e0165df Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 25 Mar 2009 21:01:47 -0700 Subject: bridge: bad error handling when adding invalid ether address This fixes an crash when empty bond device is added to a bridge. If an interface with invalid ethernet address (all zero) is added to a bridge, then bridge code detects it when setting up the forward databas entry. But the error unwind is broken, the bridge port object can get freed twice: once when ref count went to zeo, and once by kfree. Since object is never really accessible, just free it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/bridge/br_if.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 727c5c510a60..8a96672e2c5c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -426,7 +426,6 @@ err2: err1: kobject_del(&p->kobj); err0: - kobject_put(&p->kobj); dev_set_promiscuity(dev, -1); put_back: dev_put(dev); -- cgit v1.2.3 From 704b3ea3b9b4ea0e115208946abd5c8a64080113 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 26 Mar 2009 01:03:23 -0700 Subject: netfilter: fix warning about invalid const usage This patch fixes the declaration of the logger structure in ebt_log and ebt_ulog: I forgot to remove the const option from their declaration in the commit ca735b3aaa945626ba65a3e51145bfe4ecd9e222 ("netfilter: use a linked list of loggers"). Pointed-out-by: Stephen Rothwell Signed-off-by: Eric Leblond Signed-off-by: David S. Miller --- net/bridge/netfilter/ebt_log.c | 2 +- net/bridge/netfilter/ebt_ulog.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index d44cbf8c374a..a94f3cc377c0 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -214,7 +214,7 @@ static struct xt_target ebt_log_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static const struct nf_logger ebt_log_logger = { +static struct nf_logger ebt_log_logger __read_mostly = { .name = "ebt_log", .logfn = &ebt_log_packet, .me = THIS_MODULE, diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 2c6d6823e703..80c78c5611b4 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -279,7 +279,7 @@ static struct xt_target ebt_ulog_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static const struct nf_logger ebt_ulog_logger = { +static struct nf_logger ebt_ulog_logger __read_mostly = { .name = "ulog", .logfn = &ebt_log_packet, .me = THIS_MODULE, -- cgit v1.2.3 From 3b334d427cb9c866216820bfad0d8318869cc154 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 26 Mar 2009 01:04:02 -0700 Subject: netfilter: fix warning in ebt_ulog init function. The ebt_ulog module does not follow the fixed convention about function return. Loading the module is triggering the following message: sys_init_module: 'ebt_ulog'->init suspiciously returned 1, it should follow 0/-E convention sys_init_module: loading module anyway... Pid: 2334, comm: modprobe Not tainted 2.6.29-rc5edenwall0-00883-g199e57b #146 Call Trace: [] ? printk+0xf/0x16 [] sys_init_module+0x107/0x186 [] syscall_call+0x7/0xb The following patch fixes the return treatment in ebt_ulog_init() function. Signed-off-by: Eric Leblond Signed-off-by: David S. Miller --- net/bridge/netfilter/ebt_ulog.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 80c78c5611b4..ac6fa43c8ec9 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -287,13 +287,13 @@ static struct nf_logger ebt_ulog_logger __read_mostly = { static int __init ebt_ulog_init(void) { - bool ret = true; + int ret; int i; if (nlbufsiz >= 128*1024) { printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," " please try a smaller nlbufsiz parameter.\n"); - return false; + return -EINVAL; } /* initialize ulog_buffers */ @@ -308,12 +308,12 @@ static int __init ebt_ulog_init(void) if (!ebtulognl) { printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to " "call netlink_kernel_create\n"); - ret = false; - } else if (xt_register_target(&ebt_ulog_tg_reg) != 0) { + ret = -ENOMEM; + } else if ((ret = xt_register_target(&ebt_ulog_tg_reg)) != 0) { netlink_kernel_release(ebtulognl); } - if (ret) + if (ret == 0) nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); return ret; -- cgit v1.2.3 From 7249dee5bdbe96302b5ff0d9a7701cf3dc8cffe8 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Thu, 26 Mar 2009 01:04:28 -0700 Subject: netfilter: fix nf_logger name in ebt_ulog. This patch renames the ebt_ulog nf_logger from "ulog" to "ebt_ulog" to be in sync with other modules naming. As this name was currently only used for informational purpose, the renaming should be harmless. Signed-off-by: Eric Leblond Signed-off-by: David S. Miller --- net/bridge/netfilter/ebt_ulog.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index ac6fa43c8ec9..133eeae45a4f 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -280,7 +280,7 @@ static struct xt_target ebt_ulog_tg_reg __read_mostly = { }; static struct nf_logger ebt_ulog_logger __read_mostly = { - .name = "ulog", + .name = "ebt_ulog", .logfn = &ebt_log_packet, .me = THIS_MODULE, }; -- cgit v1.2.3 From ede5ad0e29b641c3d3a644272a9127bfd98dfcc8 Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Thu, 26 Mar 2009 01:11:48 -0700 Subject: net: core: remove unneeded include in net/core/utils.c. Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/core/utils.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net') diff --git a/net/core/utils.c b/net/core/utils.c index 72e0ebe964a0..83221aee7084 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From d271e8bd8c60ce059ee36d836ba063cfc61c3e21 Mon Sep 17 00:00:00 2001 From: Holger Eitzenberger Date: Thu, 26 Mar 2009 13:37:14 +0100 Subject: ctnetlink: compute generic part of event more acurately On a box with most of the optional Netfilter switches turned off some of the NLAs are never send, e. g. secmark, mark or the conntrack byte/packet counters. As a worst case scenario this may possibly still lead to ctnetlink skbs being reallocated in netlink_trim() later, loosing all the nice effects from the previous patches. I try to solve that (at least partly) by correctly #ifdef'ing the NLAs in the computation. Signed-off-by: Holger Eitzenberger Signed-off-by: Patrick McHardy --- net/netfilter/nf_conntrack_netlink.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 03547c60f389..2fb833b130c3 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -441,19 +441,28 @@ ctnetlink_alloc_skb(const struct nf_conntrack_tuple *tuple, gfp_t gfp) + 3 * NLA_TYPE_SIZE(u_int8_t) /* CTA_PROTO_NUM */ + NLA_TYPE_SIZE(u_int32_t) /* CTA_ID */ + NLA_TYPE_SIZE(u_int32_t) /* CTA_STATUS */ +#ifdef CONFIG_NF_CT_ACCT + 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */ + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_PACKETS */ + 2 * NLA_TYPE_SIZE(uint64_t) /* CTA_COUNTERS_BYTES */ +#endif + NLA_TYPE_SIZE(u_int32_t) /* CTA_TIMEOUT */ + nla_total_size(0) /* CTA_PROTOINFO */ + nla_total_size(0) /* CTA_HELP */ + nla_total_size(NF_CT_HELPER_NAME_LEN) /* CTA_HELP_NAME */ +#ifdef CONFIG_NF_CONNTRACK_SECMARK + NLA_TYPE_SIZE(u_int32_t) /* CTA_SECMARK */ +#endif +#ifdef CONFIG_NF_NAT_NEEDED + 2 * nla_total_size(0) /* CTA_NAT_SEQ_ADJ_ORIG|REPL */ + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_POS */ + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_BEFORE */ + 2 * NLA_TYPE_SIZE(u_int32_t) /* CTA_NAT_SEQ_CORRECTION_AFTER */ - + NLA_TYPE_SIZE(u_int32_t); /* CTA_MARK */ +#endif +#ifdef CONFIG_NF_CONNTRACK_MARK + + NLA_TYPE_SIZE(u_int32_t) /* CTA_MARK */ +#endif + ; #undef NLA_TYPE_SIZE -- cgit v1.2.3 From 8f1ead2d1a626ed0c85b3d2c2046a49081d5933f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 26 Mar 2009 00:59:10 -0700 Subject: GRO: Disable GRO on legacy netif_rx path When I fixed the GRO crash in the legacy receive path I used napi_complete to replace __napi_complete. Unfortunately they're not the same when NETPOLL is enabled, which may result in us not calling __napi_complete at all. What's more, we really do need to keep the __napi_complete call within the IRQ-off section since in theory an IRQ can occur in between and fill up the backlog to the maximum, causing us to lock up. Since we can't seem to find a fix that works properly right now, this patch reverts all the GRO support from the netif_rx path. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 052dd478d3e1..63ec4bf89b29 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2627,18 +2627,15 @@ static int process_backlog(struct napi_struct *napi, int quota) local_irq_disable(); skb = __skb_dequeue(&queue->input_pkt_queue); if (!skb) { + __napi_complete(napi); local_irq_enable(); - napi_complete(napi); - goto out; + break; } local_irq_enable(); - napi_gro_receive(napi, skb); + netif_receive_skb(skb); } while (++work < quota && jiffies == start_time); - napi_gro_flush(napi); - -out: return work; } -- cgit v1.2.3 From 71f6f6dfdf7c7a67462386d9ea05c1095a89c555 Mon Sep 17 00:00:00 2001 From: Jesper Nilsson Date: Fri, 27 Mar 2009 00:17:45 -0700 Subject: ipv6: Plug sk_buff leak in ipv6_rcv (net/ipv6/ip6_input.c) Commit 778d80be52699596bf70e0eb0761cf5e1e46088d (ipv6: Add disable_ipv6 sysctl to disable IPv6 operaion on specific interface) seems to have introduced a leak of sk_buff's for ipv6 traffic, at least in some configurations where idev is NULL, or when ipv6 is disabled via sysctl. The problem is that if the first condition of the if-statement returns non-NULL, it returns an skb with only one reference, and when the other conditions apply, execution jumps to the "out" label, which does not call kfree_skb for it. To plug this leak, change to use the "drop" label instead. (this relies on it being ok to call kfree_skb on NULL) This also allows us to avoid calling rcu_read_unlock here, and removes the only user of the "out" label. Signed-off-by: Jesper Nilsson Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index f171e8dbac91..8f04bd9da274 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -75,8 +75,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); - rcu_read_unlock(); - goto out; + goto drop; } memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm)); @@ -147,7 +146,6 @@ err: drop: rcu_read_unlock(); kfree_skb(skb); -out: return 0; } -- cgit v1.2.3 From 7d0b591c655ca0d72ebcbd242cf659a20a8995c5 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert Date: Fri, 27 Mar 2009 00:22:01 -0700 Subject: xfrm: spin_lock() should be spin_unlock() in xfrm_state.c spin_lock() should be spin_unlock() in xfrm_state_walk_done(). caused by: commit 12a169e7d8f4b1c95252d8b04ed0f1033ed7cfe2 "ipsec: Put dumpers on the dump list" Reported-by: Marc Milgram Signed-off-by: Chuck Ebbert Signed-off-by: David S. Miller --- net/xfrm/xfrm_state.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 62a5425cc6aa..82271720d970 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1615,7 +1615,7 @@ void xfrm_state_walk_done(struct xfrm_state_walk *walk) spin_lock_bh(&xfrm_state_lock); list_del(&walk->all); - spin_lock_bh(&xfrm_state_lock); + spin_unlock_bh(&xfrm_state_lock); } EXPORT_SYMBOL(xfrm_state_walk_done); -- cgit v1.2.3 From 03ba999117eb8688252f9068356b6e028c2c3a56 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 27 Mar 2009 00:27:18 -0700 Subject: appletalk: this warning can go I think Its past 2.2 ... Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/appletalk/ddp.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net') diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 3e0671df3a3f..d6a9243641af 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1571,14 +1571,10 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr usat->sat_family != AF_APPLETALK) return -EINVAL; - /* netatalk doesn't implement this check */ + /* netatalk didn't implement this check */ if (usat->sat_addr.s_node == ATADDR_BCAST && !sock_flag(sk, SOCK_BROADCAST)) { - printk(KERN_INFO "SO_BROADCAST: Fix your netatalk as " - "it will break before 2.2\n"); -#if 0 return -EPERM; -#endif } } else { if (sk->sk_state != TCP_ESTABLISHED) -- cgit v1.2.3 From 83e0bbcbe2145f160fbaa109b0439dae7f4a38a9 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Fri, 27 Mar 2009 00:28:21 -0700 Subject: af_rose/x25: Sanity check the maximum user frame size Otherwise we can wrap the sizes and end up sending garbage. Closes #10423 Signed-off-by: Alan Cox Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 6 +++++- net/rose/af_rose.c | 4 ++++ net/x25/af_x25.c | 6 ++++++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 6d9c58ec56ac..d1c16bbee932 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1086,7 +1086,11 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, SOCK_DEBUG(sk, "NET/ROM: sendto: Addresses built.\n"); - /* Build a packet */ + /* Build a packet - the conventional user limit is 236 bytes. We can + do ludicrously large NetROM frames but must not overflow */ + if (len > 65536) + return -EMSGSIZE; + SOCK_DEBUG(sk, "NET/ROM: sendto: building packet.\n"); size = len + NR_NETWORK_LEN + NR_TRANSPORT_LEN; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 650139626581..0f36e8d59b29 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1124,6 +1124,10 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock, /* Build a packet */ SOCK_DEBUG(sk, "ROSE: sendto: building packet.\n"); + /* Sanity check the packet size */ + if (len > 65535) + return -EMSGSIZE; + size = len + AX25_BPQ_HEADER_LEN + AX25_MAX_HEADER_LEN + ROSE_MIN_LEN; if ((skb = sock_alloc_send_skb(sk, size, msg->msg_flags & MSG_DONTWAIT, &err)) == NULL) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 9ca17b1ce52e..ed80af8ca5fb 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1035,6 +1035,12 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock, sx25.sx25_addr = x25->dest_addr; } + /* Sanity check the packet size */ + if (len > 65535) { + rc = -EMSGSIZE; + goto out; + } + SOCK_DEBUG(sk, "x25_sendmsg: sendto: Addresses built.\n"); /* Build a packet */ -- cgit v1.2.3 From 3ba13d179e8c24c68eac32b93593a6b10fcd1572 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Feb 2009 06:02:22 +0000 Subject: constify dentry_operations: rest Signed-off-by: Al Viro --- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 35dd7371752a..2f895f60ca8a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -328,7 +328,7 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) dentry->d_inode->i_ino); } -static struct dentry_operations sockfs_dentry_operations = { +static const struct dentry_operations sockfs_dentry_operations = { .d_delete = sockfs_delete_dentry, .d_dname = sockfs_dname, }; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 577385a4a5dc..9ced0628d69c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -480,7 +480,7 @@ static int rpc_delete_dentry(struct dentry *dentry) return 1; } -static struct dentry_operations rpc_dentry_operations = { +static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; -- cgit v1.2.3 From fa56dddd6720c8d4b9fa4c942377d2a019cf3708 Mon Sep 17 00:00:00 2001 From: Alina Friedrichsen Date: Tue, 10 Mar 2009 00:49:46 +0100 Subject: mac80211: ieee80211_ibss_commit() cleanup Don't call ieee80211_sta_find_ibss() directly, like it's done in STA mode, so that the commit() call is more harmless respectively has less site-effects. Signed-off-by: Alina Friedrichsen Signed-off-by: John W. Linville --- net/mac80211/ibss.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index f4becc12904e..3201e1f96365 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -812,8 +812,9 @@ int ieee80211_ibss_commit(struct ieee80211_sub_if_data *sdata) ifibss->ibss_join_req = jiffies; ifibss->state = IEEE80211_IBSS_MLME_SEARCH; + set_bit(IEEE80211_IBSS_REQ_RUN, &ifibss->request); - return ieee80211_sta_find_ibss(sdata); + return 0; } int ieee80211_ibss_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) -- cgit v1.2.3 From 633e24ed95b6c87b42f201ecb65c12a75a5a7eef Mon Sep 17 00:00:00 2001 From: Reinette Chatre Date: Thu, 12 Mar 2009 09:20:40 -0700 Subject: cfg80211/nl80211: remove usage of CONFIG_NL80211 The scan capability added to cfg80211/nl80211 introduced a dependency on nl80211 by cfg80211. We can thus no longer have just cfg80211 without nl80211. Specifically, cfg80211_scan_done() calls nl80211_send_scan_aborted() or nl80211_send_scan_done(). Now we remove the option for user to select nl80211. It will always be compiled if user selects cfg80211. Signed-off-by: Reinette Chatre Signed-off-by: John W. Linville --- net/wireless/Kconfig | 13 ------------- net/wireless/Makefile | 3 +-- net/wireless/nl80211.h | 26 -------------------------- 3 files changed, 1 insertion(+), 41 deletions(-) (limited to 'net') diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 092ae6faccca..d1d18f34d272 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -10,19 +10,6 @@ config CFG80211_REG_DEBUG If unsure, say N. -config NL80211 - bool "nl80211 new netlink interface support" - depends on CFG80211 - default y - ---help--- - This option turns on the new netlink interface - (nl80211) support in cfg80211. - - If =n, drivers using mac80211 will be configured via - wireless extension support provided by that subsystem. - - If unsure, say Y. - config WIRELESS_OLD_REGULATORY bool "Old wireless static regulatory definitions" default y diff --git a/net/wireless/Makefile b/net/wireless/Makefile index dad43c24f695..c157b4d8014b 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -5,8 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o -cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o +cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o -cfg80211-$(CONFIG_NL80211) += nl80211.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index e65a3c38c52f..5b5fe1339de0 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -3,7 +3,6 @@ #include "core.h" -#ifdef CONFIG_NL80211 extern int nl80211_init(void); extern void nl80211_exit(void); extern void nl80211_notify_dev_rename(struct cfg80211_registered_device *rdev); @@ -12,30 +11,5 @@ extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, struct net_device *netdev); extern void nl80211_send_reg_change_event(struct regulatory_request *request); -#else -static inline int nl80211_init(void) -{ - return 0; -} -static inline void nl80211_exit(void) -{ -} -static inline void nl80211_notify_dev_rename( - struct cfg80211_registered_device *rdev) -{ -} -static inline void -nl80211_send_scan_done(struct cfg80211_registered_device *rdev, - struct net_device *netdev) -{} -static inline void nl80211_send_scan_aborted( - struct cfg80211_registered_device *rdev, - struct net_device *netdev) -{} -static inline void -nl80211_send_reg_change_event(struct regulatory_request *request) -{ -} -#endif /* CONFIG_NL80211 */ #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3 From 176be728ee7d32cfd33702d82c0733e51f66ab5b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Mar 2009 23:49:28 +0100 Subject: mac80211: remove ieee80211_num_regular_queues This inline is useless and actually makes the code _longer_ rather than shorter. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 +- net/mac80211/tx.c | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 841b8450b3de..aaf7793583a7 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1834,7 +1834,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd->flags |= IEEE80211_STA_CREATE_IBSS | IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL; - if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) + if (sdata->local->hw.queues >= 4) ifmgd->flags |= IEEE80211_STA_WMM_ENABLED; } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 457238a2f3fc..038460b0a48a 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1666,8 +1666,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, } /* receiver and we are QoS enabled, use a QoS type frame */ - if (sta_flags & WLAN_STA_WME && - ieee80211_num_regular_queues(&local->hw) >= 4) { + if ((sta_flags & WLAN_STA_WME) && local->hw.queues >= 4) { fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); hdrlen += 2; } @@ -1802,7 +1801,7 @@ void ieee80211_clear_tx_pending(struct ieee80211_local *local) int i, j; struct ieee80211_tx_stored_packet *store; - for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) { + for (i = 0; i < local->hw.queues; i++) { if (!test_bit(i, local->queues_pending)) continue; store = &local->pending_packet[i]; @@ -1827,7 +1826,7 @@ void ieee80211_tx_pending(unsigned long data) int i, ret; netif_tx_lock_bh(dev); - for (i = 0; i < ieee80211_num_regular_queues(&local->hw); i++) { + for (i = 0; i < local->hw.queues; i++) { /* Check that this queue is ok */ if (__netif_subqueue_stopped(local->mdev, i) && !test_bit(i, local->queues_pending_run)) -- cgit v1.2.3 From 11432379fd2a3854a3408424d8dcd99afd811573 Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Thu, 12 Mar 2009 14:04:34 +0100 Subject: mac80211: start pending scan after probe/auth/assoc timed out If a scan is queued in STA mode while the interface is in state direct probe, authenticate or associate the scan is delayed until the interface enters disabled or associated state. But in case of direct probe-, authentication- or association- timeout sta_work will not be scheduled anymore (without external trigger) and thus the pending scan is not executed and prevents a new scan from being triggered (-EBUSY). Fix this by queueing the sta work again after direct probe-, authentication- and association- timeout. Signed-off-by: Helmut Schaa Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index aaf7793583a7..a55879663b3c 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -682,6 +682,7 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct ieee80211_local *local = sdata->local; ifmgd->direct_probe_tries++; if (ifmgd->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { @@ -697,6 +698,13 @@ static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata) ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, ifmgd->ssid, ifmgd->ssid_len); + + /* + * We might have a pending scan which had no chance to run yet + * due to state == IEEE80211_STA_MLME_DIRECT_PROBE. + * Hence, queue the STAs work again + */ + queue_work(local->hw.workqueue, &ifmgd->work); return; } @@ -721,6 +729,7 @@ static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata) static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct ieee80211_local *local = sdata->local; ifmgd->auth_tries++; if (ifmgd->auth_tries > IEEE80211_AUTH_MAX_TRIES) { @@ -732,6 +741,13 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata) ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, ifmgd->ssid, ifmgd->ssid_len); + + /* + * We might have a pending scan which had no chance to run yet + * due to state == IEEE80211_STA_MLME_AUTHENTICATE. + * Hence, queue the STAs work again + */ + queue_work(local->hw.workqueue, &ifmgd->work); return; } @@ -878,6 +894,7 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata) static void ieee80211_associate(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + struct ieee80211_local *local = sdata->local; ifmgd->assoc_tries++; if (ifmgd->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { @@ -889,6 +906,12 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata) ieee80211_rx_bss_remove(sdata, ifmgd->bssid, sdata->local->hw.conf.channel->center_freq, ifmgd->ssid, ifmgd->ssid_len); + /* + * We might have a pending scan which had no chance to run yet + * due to state == IEEE80211_STA_MLME_ASSOCIATE. + * Hence, queue the STAs work again + */ + queue_work(local->hw.workqueue, &ifmgd->work); return; } -- cgit v1.2.3 From b5bde374f0f61f5d97114d400ade8fc96bf6f10d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Mar 2009 11:19:45 +0100 Subject: mac80211: fix warnings in ieee80211_if_config The last warning can never trigger, and the explicit AP_VLAN check is pointless if we move the config_interface check down, in practice config_interface is required anyway. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/main.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/mac80211/main.c b/net/mac80211/main.c index f38db4d37e5d..dac68d476bff 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -161,12 +161,6 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) if (WARN_ON(!netif_running(sdata->dev))) return 0; - if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) - return -EINVAL; - - if (!local->ops->config_interface) - return 0; - memset(&conf, 0, sizeof(conf)); if (sdata->vif.type == NL80211_IFTYPE_STATION) @@ -183,6 +177,9 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) return -EINVAL; } + if (!local->ops->config_interface) + return 0; + switch (sdata->vif.type) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_ADHOC: @@ -224,9 +221,6 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) } } - if (WARN_ON(!conf.bssid && (changed & IEEE80211_IFCC_BSSID))) - return -EINVAL; - conf.changed = changed; return local->ops->config_interface(local_to_hw(local), -- cgit v1.2.3 From 25420604c8967ff24f087dd7b9cd4b278567d39a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Mar 2009 11:43:36 +0100 Subject: mac80211: stop queues across suspend/resume Even though userland probably cannot submit packets, there might still be some coming, and that's no good when the driver doesn't expect them. Stop the queues across suspend/resume. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/pm.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index fbb91f1aebb2..ad12c2a03a95 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -598,6 +598,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_PS, IEEE80211_QUEUE_STOP_REASON_CSA, IEEE80211_QUEUE_STOP_REASON_AGGREGATION, + IEEE80211_QUEUE_STOP_REASON_SUSPEND, }; struct ieee80211_master_priv { diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 44525f517077..c923ceb089a3 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -11,6 +11,9 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) struct ieee80211_if_init_conf conf; struct sta_info *sta; + ieee80211_stop_queues_by_reason(hw, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); + flush_workqueue(local->hw.workqueue); /* disable keys */ @@ -113,5 +116,8 @@ int __ieee80211_resume(struct ieee80211_hw *hw) ieee80211_configure_filter(local); netif_addr_unlock_bh(local->mdev); + ieee80211_wake_queues_by_reason(hw, + IEEE80211_QUEUE_STOP_REASON_SUSPEND); + return 0; } -- cgit v1.2.3 From aae89831df03e5282a8f5c0ee46432cfb677fc5c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Mar 2009 12:52:10 +0100 Subject: wireless: radiotap updates Radiotap was updated to include a "bad PLCP" flag and standardise the "bad FCS" flag in the "flags" rather than "RX flags" field, this patch updates Linux to that standard. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 66f7ecf51b92..fcc0a5995791 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -142,6 +142,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_FLAGS */ if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) *pos |= IEEE80211_RADIOTAP_F_FCS; + if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) + *pos |= IEEE80211_RADIOTAP_F_BADFCS; if (status->flag & RX_FLAG_SHORTPRE) *pos |= IEEE80211_RADIOTAP_F_SHORTPRE; pos++; @@ -204,9 +206,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* ensure 2 byte alignment for the 2 byte field as required */ if ((pos - (unsigned char *)rthdr) & 1) pos++; - /* FIXME: when radiotap gets a 'bad PLCP' flag use it here */ - if (status->flag & (RX_FLAG_FAILED_FCS_CRC | RX_FLAG_FAILED_PLCP_CRC)) - *(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADFCS); + if (status->flag & RX_FLAG_FAILED_PLCP_CRC) + *(__le16 *)pos |= cpu_to_le16(IEEE80211_RADIOTAP_F_RX_BADPLCP); pos += 2; } -- cgit v1.2.3 From ec30415f7935f0ff92f93a4ac87233ca3007a78a Mon Sep 17 00:00:00 2001 From: Vasanthakumar Thiagarajan Date: Fri, 13 Mar 2009 20:26:52 +0530 Subject: mac80211: Populate HT limitation with TKIP/WEP to the handler for SIOCSIWENCODE too Signed-off-by: Vasanthakumar Thiagarajan Signed-off-by: John W. Linville --- net/mac80211/wext.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 935c63ed3dfa..e55d2834764c 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -630,7 +630,7 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev, struct ieee80211_sub_if_data *sdata; int idx, i, alg = ALG_WEP; u8 bcaddr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; - int remove = 0; + int remove = 0, ret; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -656,11 +656,20 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev, return 0; } - return ieee80211_set_encryption( + ret = ieee80211_set_encryption( sdata, bcaddr, idx, alg, remove, !sdata->default_key, keybuf, erq->length); + + if (!ret) { + if (remove) + sdata->u.mgd.flags &= ~IEEE80211_STA_TKIP_WEP_USED; + else + sdata->u.mgd.flags |= IEEE80211_STA_TKIP_WEP_USED; + } + + return ret; } -- cgit v1.2.3 From 8fdc621dc743b87879ccf0177969864b09388d9a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 14 Mar 2009 09:34:01 +0100 Subject: nl80211: export supported commands This makes nl80211 export the supported commands (command groups) per wiphy so userspace has an idea what it can do -- this will be required reading for userspace when we introduce auth/assoc /or/ connect for older hardware that cannot separate auth and assoc. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ab9d8f14e151..58ee1b1aff89 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -131,6 +131,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_rates, *nl_rate; struct nlattr *nl_modes; + struct nlattr *nl_cmds; enum ieee80211_band band; struct ieee80211_channel *chan; struct ieee80211_rate *rate; @@ -242,6 +243,32 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, } nla_nest_end(msg, nl_bands); + nl_cmds = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_COMMANDS); + if (!nl_cmds) + goto nla_put_failure; + + i = 0; +#define CMD(op, n) \ + do { \ + if (dev->ops->op) { \ + i++; \ + NLA_PUT_U32(msg, i, NL80211_CMD_ ## n); \ + } \ + } while (0) + + CMD(add_virtual_intf, NEW_INTERFACE); + CMD(change_virtual_intf, SET_INTERFACE); + CMD(add_key, NEW_KEY); + CMD(add_beacon, NEW_BEACON); + CMD(add_station, NEW_STATION); + CMD(add_mpath, NEW_MPATH); + CMD(set_mesh_params, SET_MESH_PARAMS); + CMD(change_bss, SET_BSS); + CMD(set_mgmt_extra_ie, SET_MGMT_EXTRA_IE); + +#undef CMD + nla_nest_end(msg, nl_cmds); + return genlmsg_end(msg, hdr); nla_put_failure: -- cgit v1.2.3 From 7f0216a49bea717b9606b81c60f2f0b6152123eb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 14 Mar 2009 09:42:49 +0100 Subject: mac80211: acquire sta_lock for station suspend/resume To avoid concurrent manipulations of the sta list (which shouldn't be possible at this point, but anyway) we need to hold the sta_lock around iterating the list. At the same time, we do not need to iterate the list at all if the driver doesn't want to be notified. Signed-off-by: Johannes Berg Acked-by: Bob Copeland Signed-off-by: John W. Linville --- net/mac80211/pm.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index c923ceb089a3..ef7be1ce2c87 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -10,6 +10,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) struct ieee80211_sub_if_data *sdata; struct ieee80211_if_init_conf conf; struct sta_info *sta; + unsigned long flags; ieee80211_stop_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_SUSPEND); @@ -21,9 +22,9 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) ieee80211_disable_keys(sdata); /* remove STAs */ - list_for_each_entry(sta, &local->sta_list, list) { - - if (local->ops->sta_notify) { + if (local->ops->sta_notify) { + spin_lock_irqsave(&local->sta_lock, flags); + list_for_each_entry(sta, &local->sta_list, list) { if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, @@ -32,11 +33,11 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) local->ops->sta_notify(hw, &sdata->vif, STA_NOTIFY_REMOVE, &sta->sta); } + spin_unlock_irqrestore(&local->sta_lock, flags); } /* remove all interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_MONITOR && netif_running(sdata->dev)) { @@ -64,6 +65,7 @@ int __ieee80211_resume(struct ieee80211_hw *hw) struct ieee80211_sub_if_data *sdata; struct ieee80211_if_init_conf conf; struct sta_info *sta; + unsigned long flags; int res; /* restart hardware */ @@ -75,7 +77,6 @@ int __ieee80211_resume(struct ieee80211_hw *hw) /* add interfaces */ list_for_each_entry(sdata, &local->interfaces, list) { - if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_MONITOR && netif_running(sdata->dev)) { @@ -87,9 +88,9 @@ int __ieee80211_resume(struct ieee80211_hw *hw) } /* add STAs back */ - list_for_each_entry(sta, &local->sta_list, list) { - - if (local->ops->sta_notify) { + if (local->ops->sta_notify) { + spin_lock_irqsave(&local->sta_lock, flags); + list_for_each_entry(sta, &local->sta_list, list) { if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, @@ -98,6 +99,7 @@ int __ieee80211_resume(struct ieee80211_hw *hw) local->ops->sta_notify(hw, &sdata->vif, STA_NOTIFY_ADD, &sta->sta); } + spin_unlock_irqrestore(&local->sta_lock, flags); } /* add back keys */ -- cgit v1.2.3 From 722f069a6dc95d7c6c2cdfbe3413899a3b768f9c Mon Sep 17 00:00:00 2001 From: Sujith Date: Tue, 17 Mar 2009 08:50:06 +0530 Subject: mac80211: Tear down aggregation sessions for suspend/resume When the driver has been notified with a STA_REMOVE, it tears down the internal ADDBA state. On resume, trying to initiate aggregation would fail because mac80211 has not cleared the operational state for that . This can be fixed by tearing down the existing sessions on a suspend. Also, the driver can initiate a new BA session when suspend is in progress. This is fixed by marking the station as being in suspend state and denying ADDBA requests for such STAs. Signed-off-by: Sujith Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-rx.c | 8 ++++++++ net/mac80211/agg-tx.c | 9 +++++++++ net/mac80211/pm.c | 25 +++++++++++++++++++++++++ net/mac80211/sta_info.h | 3 +++ 4 files changed, 45 insertions(+) (limited to 'net') diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index a95affc94629..07656d830bc4 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -197,6 +197,14 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, status = WLAN_STATUS_REQUEST_DECLINED; + if (test_sta_flags(sta, WLAN_STA_SUSPEND)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Suspend in progress. " + "Denying ADDBA request\n"); +#endif + goto end_no_lock; + } + /* sanity check for incoming parameters: * check if configuration can support the BA policy * and if buffer size does not exceeds max value */ diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 1df116d4d6e7..e5776ef1717a 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -257,6 +257,15 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) goto unlock; } + if (test_sta_flags(sta, WLAN_STA_SUSPEND)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Suspend in progress. " + "Denying BA session request\n"); +#endif + ret = -EINVAL; + goto unlock; + } + spin_lock_bh(&sta->lock); sdata = sta->sdata; diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index ef7be1ce2c87..1e6152ac6778 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -21,6 +21,19 @@ int __ieee80211_suspend(struct ieee80211_hw *hw) list_for_each_entry(sdata, &local->interfaces, list) ieee80211_disable_keys(sdata); + /* Tear down aggregation sessions */ + + rcu_read_lock(); + + if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { + list_for_each_entry_rcu(sta, &local->sta_list, list) { + set_sta_flags(sta, WLAN_STA_SUSPEND); + ieee80211_sta_tear_down_BA_sessions(sta); + } + } + + rcu_read_unlock(); + /* remove STAs */ if (local->ops->sta_notify) { spin_lock_irqsave(&local->sta_lock, flags); @@ -102,6 +115,18 @@ int __ieee80211_resume(struct ieee80211_hw *hw) spin_unlock_irqrestore(&local->sta_lock, flags); } + /* Clear Suspend state so that ADDBA requests can be processed */ + + rcu_read_lock(); + + if (hw->flags & IEEE80211_HW_AMPDU_AGGREGATION) { + list_for_each_entry_rcu(sta, &local->sta_list, list) { + clear_sta_flags(sta, WLAN_STA_SUSPEND); + } + } + + rcu_read_unlock(); + /* add back keys */ list_for_each_entry(sdata, &local->interfaces, list) if (netif_running(sdata->dev)) diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 1f45573c580c..5b223b216e5a 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -35,6 +35,8 @@ * IEEE80211_TX_CTL_CLEAR_PS_FILT control flag) when the next * frame to this station is transmitted. * @WLAN_STA_MFP: Management frame protection is used with this STA. + * @WLAN_STA_SUSPEND: Set/cleared during a suspend/resume cycle. + * Used to deny ADDBA requests (both TX and RX). */ enum ieee80211_sta_info_flags { WLAN_STA_AUTH = 1<<0, @@ -48,6 +50,7 @@ enum ieee80211_sta_info_flags { WLAN_STA_PSPOLL = 1<<8, WLAN_STA_CLEAR_PS_FILT = 1<<9, WLAN_STA_MFP = 1<<10, + WLAN_STA_SUSPEND = 1<<11 }; #define STA_TID_NUM 16 -- cgit v1.2.3 From 3b85875a252dbbd95c2e04d73639719a0a79634e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Mar 2009 09:55:09 +0100 Subject: nl80211: rework locking When I added scanning to cfg80211, we got a lock dependency like this: rtnl --> cfg80211_mtx nl80211, on the other hand, has the reverse lock dependency: cfg80211_mtx --> rtnl which clearly is a bad idea. This patch reworks nl80211 to take these two locks in the other order to fix the possible, and easily triggerable, deadlock. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 271 ++++++++++++++++++++++++++++++------------------- 1 file changed, 166 insertions(+), 105 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 58ee1b1aff89..a3ecf8d73898 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -602,9 +602,12 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto unlock_rtnl; + ifindex = dev->ifindex; type = dev->ieee80211_ptr->iftype; dev_put(dev); @@ -641,17 +644,17 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) if (!err) flags = &_flags; } - rtnl_lock(); + err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, type, flags, ¶ms); dev = __dev_get_by_index(&init_net, ifindex); WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); - rtnl_unlock(); - unlock: cfg80211_put_dev(drv); + unlock_rtnl: + rtnl_unlock(); return err; } @@ -674,9 +677,13 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + rtnl_lock(); + drv = cfg80211_get_dev_from_info(info); - if (IS_ERR(drv)) - return PTR_ERR(drv); + if (IS_ERR(drv)) { + err = PTR_ERR(drv); + goto unlock_rtnl; + } if (!drv->ops->add_virtual_intf || !(drv->wiphy.interface_modes & (1 << type))) { @@ -690,18 +697,17 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); } - rtnl_lock(); err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, &flags); err = drv->ops->add_virtual_intf(&drv->wiphy, nla_data(info->attrs[NL80211_ATTR_IFNAME]), type, err ? NULL : &flags, ¶ms); - rtnl_unlock(); - unlock: cfg80211_put_dev(drv); + unlock_rtnl: + rtnl_unlock(); return err; } @@ -711,9 +717,11 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) int ifindex, err; struct net_device *dev; + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto unlock_rtnl; ifindex = dev->ifindex; dev_put(dev); @@ -722,12 +730,12 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) goto out; } - rtnl_lock(); err = drv->ops->del_virtual_intf(&drv->wiphy, ifindex); - rtnl_unlock(); out: cfg80211_put_dev(drv); + unlock_rtnl: + rtnl_unlock(); return err; } @@ -779,9 +787,11 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto unlock_rtnl; if (!drv->ops->get_key) { err = -EOPNOTSUPP; @@ -809,10 +819,8 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (mac_addr) NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr); - rtnl_lock(); err = drv->ops->get_key(&drv->wiphy, dev, key_idx, mac_addr, &cookie, get_key_callback); - rtnl_unlock(); if (err) goto out; @@ -830,6 +838,9 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) out: cfg80211_put_dev(drv); dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; } @@ -858,9 +869,11 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) !info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]) return -EINVAL; + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto unlock_rtnl; if (info->attrs[NL80211_ATTR_KEY_DEFAULT]) func = drv->ops->set_default_key; @@ -872,13 +885,15 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) goto out; } - rtnl_lock(); err = func(&drv->wiphy, dev, key_idx); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + + unlock_rtnl: + rtnl_unlock(); + return err; } @@ -948,22 +963,25 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto unlock_rtnl; if (!drv->ops->add_key) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->add_key(&drv->wiphy, dev, key_idx, mac_addr, ¶ms); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; } @@ -984,22 +1002,26 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto unlock_rtnl; if (!drv->ops->del_key) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->del_key(&drv->wiphy, dev, key_idx, mac_addr); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + + unlock_rtnl: + rtnl_unlock(); + return err; } @@ -1013,9 +1035,11 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) struct beacon_parameters params; int haveinfo = 0; + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto unlock_rtnl; switch (info->genlhdr->cmd) { case NL80211_CMD_NEW_BEACON: @@ -1076,13 +1100,14 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) goto out; } - rtnl_lock(); err = call(&drv->wiphy, dev, ¶ms); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; } @@ -1092,22 +1117,25 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) int err; struct net_device *dev; + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto unlock_rtnl; if (!drv->ops->del_beacon) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->del_beacon(&drv->wiphy, dev); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + unlock_rtnl: + rtnl_unlock(); + return err; } @@ -1273,14 +1301,18 @@ static int nl80211_dump_station(struct sk_buff *skb, return -EINVAL; } - netdev = dev_get_by_index(&init_net, ifidx); - if (!netdev) - return -ENODEV; + rtnl_lock(); + + netdev = __dev_get_by_index(&init_net, ifidx); + if (!netdev) { + err = -ENODEV; + goto out_rtnl; + } dev = cfg80211_get_dev_from_ifindex(ifidx); if (IS_ERR(dev)) { err = PTR_ERR(dev); - goto out_put_netdev; + goto out_rtnl; } if (!dev->ops->dump_station) { @@ -1288,15 +1320,13 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; } - rtnl_lock(); - while (1) { err = dev->ops->dump_station(&dev->wiphy, netdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) break; if (err) - goto out_err_rtnl; + goto out_err; if (nl80211_send_station(skb, NETLINK_CB(cb->skb).pid, @@ -1312,12 +1342,10 @@ static int nl80211_dump_station(struct sk_buff *skb, out: cb->args[1] = sta_idx; err = skb->len; - out_err_rtnl: - rtnl_unlock(); out_err: cfg80211_put_dev(dev); - out_put_netdev: - dev_put(netdev); + out_rtnl: + rtnl_unlock(); return err; } @@ -1338,19 +1366,18 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->get_station) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->get_station(&drv->wiphy, dev, mac_addr, &sinfo); - rtnl_unlock(); - if (err) goto out; @@ -1367,10 +1394,12 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) out_free: nlmsg_free(msg); - out: cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -1438,9 +1467,11 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) @@ -1451,15 +1482,16 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) goto out; } - rtnl_lock(); err = drv->ops->change_station(&drv->wiphy, dev, mac_addr, ¶ms); - rtnl_unlock(); out: if (params.vlan) dev_put(params.vlan); cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -1501,9 +1533,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) ¶ms.station_flags)) return -EINVAL; + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; err = get_vlan(info->attrs[NL80211_ATTR_STA_VLAN], drv, ¶ms.vlan); if (err) @@ -1514,15 +1548,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) goto out; } - rtnl_lock(); err = drv->ops->add_station(&drv->wiphy, dev, mac_addr, ¶ms); - rtnl_unlock(); out: if (params.vlan) dev_put(params.vlan); cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -1536,22 +1571,25 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->del_station) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->del_station(&drv->wiphy, dev, mac_addr); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -1632,14 +1670,18 @@ static int nl80211_dump_mpath(struct sk_buff *skb, return -EINVAL; } - netdev = dev_get_by_index(&init_net, ifidx); - if (!netdev) - return -ENODEV; + rtnl_lock(); + + netdev = __dev_get_by_index(&init_net, ifidx); + if (!netdev) { + err = -ENODEV; + goto out_rtnl; + } dev = cfg80211_get_dev_from_ifindex(ifidx); if (IS_ERR(dev)) { err = PTR_ERR(dev); - goto out_put_netdev; + goto out_rtnl; } if (!dev->ops->dump_mpath) { @@ -1647,15 +1689,13 @@ static int nl80211_dump_mpath(struct sk_buff *skb, goto out_err; } - rtnl_lock(); - while (1) { err = dev->ops->dump_mpath(&dev->wiphy, netdev, path_idx, dst, next_hop, &pinfo); if (err == -ENOENT) break; if (err) - goto out_err_rtnl; + goto out_err; if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -1670,12 +1710,10 @@ static int nl80211_dump_mpath(struct sk_buff *skb, out: cb->args[1] = path_idx; err = skb->len; - out_err_rtnl: - rtnl_unlock(); out_err: cfg80211_put_dev(dev); - out_put_netdev: - dev_put(netdev); + out_rtnl: + rtnl_unlock(); return err; } @@ -1697,19 +1735,18 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->get_mpath) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->get_mpath(&drv->wiphy, dev, dst, next_hop, &pinfo); - rtnl_unlock(); - if (err) goto out; @@ -1726,10 +1763,12 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) out_free: nlmsg_free(msg); - out: cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -1750,22 +1789,25 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->change_mpath) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->change_mpath(&drv->wiphy, dev, dst, next_hop); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) @@ -1785,22 +1827,25 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); next_hop = nla_data(info->attrs[NL80211_ATTR_MPATH_NEXT_HOP]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->add_mpath) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->add_mpath(&drv->wiphy, dev, dst, next_hop); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -1814,22 +1859,25 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) dst = nla_data(info->attrs[NL80211_ATTR_MAC]); + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->del_mpath) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->del_mpath(&drv->wiphy, dev, dst); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -1862,22 +1910,25 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]); } + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->change_bss) { err = -EOPNOTSUPP; goto out; } - rtnl_lock(); err = drv->ops->change_bss(&drv->wiphy, dev, ¶ms); - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -1972,10 +2023,12 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, struct nlattr *pinfoattr; struct sk_buff *msg; + rtnl_lock(); + /* Look up our device */ err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->get_mesh_params) { err = -EOPNOTSUPP; @@ -1983,9 +2036,7 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, } /* Get the mesh params */ - rtnl_lock(); err = drv->ops->get_mesh_params(&drv->wiphy, dev, &cur_params); - rtnl_unlock(); if (err) goto out; @@ -2034,13 +2085,16 @@ static int nl80211_get_mesh_params(struct sk_buff *skb, err = genlmsg_unicast(msg, info->snd_pid); goto out; -nla_put_failure: + nla_put_failure: genlmsg_cancel(msg, hdr); err = -EMSGSIZE; -out: + out: /* Cleanup */ cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -2087,9 +2141,11 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) parent_attr, nl80211_meshconf_params_policy)) return -EINVAL; + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; if (!drv->ops->set_mesh_params) { err = -EOPNOTSUPP; @@ -2136,14 +2192,15 @@ static int nl80211_set_mesh_params(struct sk_buff *skb, struct genl_info *info) nla_get_u16); /* Apply changes */ - rtnl_lock(); err = drv->ops->set_mesh_params(&drv->wiphy, dev, &cfg, mask); - rtnl_unlock(); out: /* cleanup */ cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -2310,19 +2367,22 @@ static int nl80211_set_mgmt_extra_ie(struct sk_buff *skb, params.ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); } + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; - if (drv->ops->set_mgmt_extra_ie) { - rtnl_lock(); + if (drv->ops->set_mgmt_extra_ie) err = drv->ops->set_mgmt_extra_ie(&drv->wiphy, dev, ¶ms); - rtnl_unlock(); - } else + else err = -EOPNOTSUPP; cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } @@ -2339,9 +2399,11 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) enum ieee80211_band band; size_t ie_len; + rtnl_lock(); + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) - return err; + goto out_rtnl; wiphy = &drv->wiphy; @@ -2350,11 +2412,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out; } - rtnl_lock(); - if (drv->scan_req) { err = -EBUSY; - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) { @@ -2362,7 +2422,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) n_channels++; if (!n_channels) { err = -EINVAL; - goto out_unlock; + goto out; } } else { for (band = 0; band < IEEE80211_NUM_BANDS; band++) @@ -2376,7 +2436,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (n_ssids > wiphy->max_scan_ssids) { err = -EINVAL; - goto out_unlock; + goto out; } if (info->attrs[NL80211_ATTR_IE]) @@ -2390,7 +2450,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) + ie_len, GFP_KERNEL); if (!request) { err = -ENOMEM; - goto out_unlock; + goto out; } request->channels = (void *)((char *)request + sizeof(*request)); @@ -2461,11 +2521,12 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) drv->scan_req = NULL; kfree(request); } - out_unlock: - rtnl_unlock(); out: cfg80211_put_dev(drv); dev_put(dev); + out_rtnl: + rtnl_unlock(); + return err; } -- cgit v1.2.3 From a9a6ffffd05f97e6acbdeafc595e269855829751 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Wed, 18 Mar 2009 14:06:44 +0200 Subject: mac80211: don't drop nullfunc frames during software scan ieee80211_tx_h_check_assoc() was dropping everything else than probe requests during software scan. So the nullfunc frame with the power save bit was dropped and AP never received it. This meant that AP never buffered any frames for the station during software scan. Fix this by allowing to transmit both probe request and nullfunc frames during software scan. Tested with stlc45xx. Signed-off-by: Kalle Valo Acked-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/scan.c | 13 +++++++++++++ net/mac80211/tx.c | 14 +++++++++++++- 2 files changed, 26 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 5030a3c87509..46f35dc6accb 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -409,6 +409,19 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, return 0; } + /* + * Hardware/driver doesn't support hw_scan, so use software + * scanning instead. First send a nullfunc frame with power save + * bit on so that AP will buffer the frames for us while we are not + * listening, then send probe requests to each channel and wait for + * the responses. After all channels are scanned, tune back to the + * original channel and send a nullfunc frame with power save bit + * off to trigger the AP to send us all the buffered frames. + * + * Note that while local->sw_scanning is true everything else but + * nullfunc frames and probe requests will be dropped in + * ieee80211_tx_h_check_assoc(). + */ local->sw_scanning = true; if (local->ops->sw_scan_start) local->ops->sw_scan_start(local_to_hw(local)); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 038460b0a48a..f3f240c69018 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -193,7 +193,19 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) return TX_CONTINUE; if (unlikely(tx->local->sw_scanning) && - !ieee80211_is_probe_req(hdr->frame_control)) + !ieee80211_is_probe_req(hdr->frame_control) && + !ieee80211_is_nullfunc(hdr->frame_control)) + /* + * When software scanning only nullfunc frames (to notify + * the sleep state to the AP) and probe requests (for the + * active scan) are allowed, all other frames should not be + * sent and we should not get here, but if we do + * nonetheless, drop them to avoid sending them + * off-channel. See the link below and + * ieee80211_start_scan() for more. + * + * http://article.gmane.org/gmane.linux.kernel.wireless.general/30089 + */ return TX_DROP; if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) -- cgit v1.2.3 From 4b4698c443c9db62b220c41a1793872d6ebe82e1 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 19 Mar 2009 13:39:19 +0200 Subject: mac80211: Fix a typo in assoc vs. reassoc check Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index a55879663b3c..4c753bb43ba9 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -103,7 +103,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) u32 rates = 0; size_t e_ies_len; - if (ifmgd->flags & IEEE80211_IBSS_PREV_BSSID_SET) { + if (ifmgd->flags & IEEE80211_STA_PREV_BSSID_SET) { e_ies = sdata->u.mgd.ie_reassocreq; e_ies_len = sdata->u.mgd.ie_reassocreq_len; } else { -- cgit v1.2.3 From a299542e97ec1939fdca7db6d3d82c0aa9bf8b9a Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 19 Mar 2009 13:39:20 +0200 Subject: mac80211: Fix reassociation by not clearing previous BSSID We must not clear the previous BSSID when roaming to another AP within the same ESS for reassociation to be used properly. It is fine to clear this when the SSID changes, so let's move the code into ieee80211_sta_set_ssid(). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4c753bb43ba9..1f49b63d8dd2 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1888,8 +1888,6 @@ int ieee80211_sta_commit(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - if (ifmgd->ssid_len) ifmgd->flags |= IEEE80211_STA_SSID_SET; else @@ -1908,6 +1906,10 @@ int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size ifmgd = &sdata->u.mgd; if (ifmgd->ssid_len != len || memcmp(ifmgd->ssid, ssid, len) != 0) { + /* + * Do not use reassociation if SSID is changed (different ESS). + */ + ifmgd->flags &= ~IEEE80211_STA_PREV_BSSID_SET; memset(ifmgd->ssid, 0, sizeof(ifmgd->ssid)); memcpy(ifmgd->ssid, ssid, len); ifmgd->ssid_len = len; -- cgit v1.2.3 From 6039f6d23fe792d615da5449e9fa1c6b43caacf6 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 19 Mar 2009 13:39:21 +0200 Subject: nl80211: Event notifications for MLME events Add new nl80211 event notifications (and a new multicast group, "mlme") for informing user space about received and processed Authentication, (Re)Association Response, Deauthentication, and Disassociation frames in station and IBSS modes (i.e., MLME SAP interface primitives MLME-AUTHENTICATE.confirm, MLME-ASSOCIATE.confirm, MLME-REASSOCIATE.confirm, MLME-DEAUTHENTICATE.indicate, and MLME-DISASSOCIATE.indication). The event data is encapsulated as the 802.11 management frame since we already have the frame in that format and it includes all the needed information. This is the initial step in providing MLME SAP interface for authentication and association with nl80211. In other words, kernel code will act as the MLME and a user space application can control it as the SME. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/mlme.c | 9 +++++-- net/wireless/Makefile | 2 +- net/wireless/mlme.c | 46 ++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/wireless/nl80211.h | 12 +++++++++ 5 files changed, 138 insertions(+), 3 deletions(-) create mode 100644 net/wireless/mlme.c (limited to 'net') diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 1f49b63d8dd2..6dc7a61bc18b 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1085,11 +1085,13 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, case WLAN_AUTH_OPEN: case WLAN_AUTH_LEAP: ieee80211_auth_completed(sdata); + cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len); break; case WLAN_AUTH_SHARED_KEY: - if (ifmgd->auth_transaction == 4) + if (ifmgd->auth_transaction == 4) { ieee80211_auth_completed(sdata); - else + cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len); + } else ieee80211_auth_challenge(sdata, mgmt, len); break; } @@ -1125,6 +1127,7 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_set_disassoc(sdata, true, false, 0); ifmgd->flags &= ~IEEE80211_STA_AUTHENTICATED; + cfg80211_send_rx_deauth(sdata->dev, (u8 *) mgmt, len); } @@ -1154,6 +1157,7 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, } ieee80211_set_disassoc(sdata, false, false, reason_code); + cfg80211_send_rx_disassoc(sdata->dev, (u8 *) mgmt, len); } @@ -1370,6 +1374,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, ieee80211_set_associated(sdata, changed); ieee80211_associated(sdata); + cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len); } diff --git a/net/wireless/Makefile b/net/wireless/Makefile index c157b4d8014b..6d1e7b27b752 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -5,7 +5,7 @@ obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o obj-$(CONFIG_LIB80211_CRYPT_CCMP) += lib80211_crypt_ccmp.o obj-$(CONFIG_LIB80211_CRYPT_TKIP) += lib80211_crypt_tkip.o -cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o +cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o mlme.o cfg80211-$(CONFIG_WIRELESS_EXT) += wext-compat.o ccflags-y += -D__CHECK_ENDIAN__ diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c new file mode 100644 index 000000000000..bec5721b6f99 --- /dev/null +++ b/net/wireless/mlme.c @@ -0,0 +1,46 @@ +/* + * cfg80211 MLME SAP interface + * + * Copyright (c) 2009, Jouni Malinen + */ + +#include +#include +#include +#include +#include +#include "core.h" +#include "nl80211.h" + +void cfg80211_send_rx_auth(struct net_device *dev, const u8 *buf, size_t len) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + nl80211_send_rx_auth(rdev, dev, buf, len); +} +EXPORT_SYMBOL(cfg80211_send_rx_auth); + +void cfg80211_send_rx_assoc(struct net_device *dev, const u8 *buf, size_t len) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + nl80211_send_rx_assoc(rdev, dev, buf, len); +} +EXPORT_SYMBOL(cfg80211_send_rx_assoc); + +void cfg80211_send_rx_deauth(struct net_device *dev, const u8 *buf, size_t len) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + nl80211_send_rx_deauth(rdev, dev, buf, len); +} +EXPORT_SYMBOL(cfg80211_send_rx_deauth); + +void cfg80211_send_rx_disassoc(struct net_device *dev, const u8 *buf, + size_t len) +{ + struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); + nl80211_send_rx_disassoc(rdev, dev, buf, len); +} +EXPORT_SYMBOL(cfg80211_send_rx_disassoc); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a3ecf8d73898..c034c2418cb3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2830,6 +2830,9 @@ static struct genl_ops nl80211_ops[] = { .dumpit = nl80211_dump_scan, }, }; +static struct genl_multicast_group nl80211_mlme_mcgrp = { + .name = "mlme", +}; /* multicast groups */ static struct genl_multicast_group nl80211_config_mcgrp = { @@ -2975,6 +2978,71 @@ nla_put_failure: nlmsg_free(msg); } +static void nl80211_send_mlme_event(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len, + enum nl80211_commands cmd) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, cmd); + if (!hdr) { + nlmsg_free(msg); + return; + } + + NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx); + NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex); + NLA_PUT(msg, NL80211_ATTR_FRAME, len, buf); + + if (genlmsg_end(msg, hdr) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, GFP_KERNEL); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + +void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, size_t len) +{ + nl80211_send_mlme_event(rdev, netdev, buf, len, + NL80211_CMD_AUTHENTICATE); +} + +void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, + size_t len) +{ + nl80211_send_mlme_event(rdev, netdev, buf, len, NL80211_CMD_ASSOCIATE); +} + +void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, + size_t len) +{ + nl80211_send_mlme_event(rdev, netdev, buf, len, + NL80211_CMD_DEAUTHENTICATE); +} + +void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *buf, + size_t len) +{ + nl80211_send_mlme_event(rdev, netdev, buf, len, + NL80211_CMD_DISASSOCIATE); +} + /* initialisation/exit functions */ int nl80211_init(void) @@ -3003,6 +3071,10 @@ int nl80211_init(void) if (err) goto err_out; + err = genl_register_mc_group(&nl80211_fam, &nl80211_mlme_mcgrp); + if (err) + goto err_out; + return 0; err_out: genl_unregister_family(&nl80211_fam); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 5b5fe1339de0..b77af4ab80be 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -11,5 +11,17 @@ extern void nl80211_send_scan_done(struct cfg80211_registered_device *rdev, extern void nl80211_send_scan_aborted(struct cfg80211_registered_device *rdev, struct net_device *netdev); extern void nl80211_send_reg_change_event(struct regulatory_request *request); +extern void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len); +extern void nl80211_send_rx_assoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len); +extern void nl80211_send_rx_deauth(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len); +extern void nl80211_send_rx_disassoc(struct cfg80211_registered_device *rdev, + struct net_device *netdev, + const u8 *buf, size_t len); #endif /* __NET_WIRELESS_NL80211_H */ -- cgit v1.2.3 From 636a5d3625993c5ca59abc81794b9ded93cdb740 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Thu, 19 Mar 2009 13:39:22 +0200 Subject: nl80211: Add MLME primitives to support external SME This patch adds new nl80211 commands to allow user space to request authentication and association (and also deauthentication and disassociation). The commands are structured to allow separate authentication and association steps, i.e., the interface between kernel and user space is similar to the MLME SAP interface in IEEE 802.11 standard and an user space application takes the role of the SME. The patch introduces MLME-AUTHENTICATE.request, MLME-{,RE}ASSOCIATE.request, MLME-DEAUTHENTICATE.request, and MLME-DISASSOCIATE.request primitives. The authentication and association commands request the actual operations in two steps (assuming the driver supports this; if not, separate authentication step is skipped; this could end up being a separate "connect" command). The initial implementation for mac80211 uses the current net/mac80211/mlme.c for actual sending and processing of management frames and the new nl80211 commands will just stop the current state machine from moving automatically from authentication to association. Future cleanup may move more of the MLME operations into cfg80211. The goal of this design is to provide more control of authentication and association process to user space without having to move the full MLME implementation. This should be enough to allow IEEE 802.11r FT protocol and 802.11s SAE authentication to be implemented. Obviously, this will also bring the extra benefit of not having to use WEXT for association requests with mac80211. An example implementation of a user space SME using the new nl80211 commands is available for wpa_supplicant. This patch is enough to get IEEE 802.11r FT protocol working with over-the-air mechanism (over-the-DS will need additional MLME primitives for handling the FT Action frames). Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 140 +++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 7 +- net/mac80211/mlme.c | 45 ++++++-- net/mac80211/wext.c | 3 + net/wireless/nl80211.c | 255 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 439 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 58693e52d458..223e536e8426 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1300,6 +1300,142 @@ static int ieee80211_scan(struct wiphy *wiphy, return ieee80211_request_scan(sdata, req); } +static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_auth_request *req) +{ + struct ieee80211_sub_if_data *sdata; + + if (!netif_running(dev)) + return -ENETDOWN; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + + switch (req->auth_type) { + case NL80211_AUTHTYPE_OPEN_SYSTEM: + sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_OPEN; + break; + case NL80211_AUTHTYPE_SHARED_KEY: + sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_SHARED_KEY; + break; + case NL80211_AUTHTYPE_FT: + sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_FT; + break; + case NL80211_AUTHTYPE_NETWORK_EAP: + sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_LEAP; + break; + default: + return -EOPNOTSUPP; + } + + memcpy(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN); + sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; + sdata->u.mgd.flags |= IEEE80211_STA_BSSID_SET; + + /* TODO: req->chan */ + sdata->u.mgd.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL; + + if (req->ssid) { + sdata->u.mgd.flags |= IEEE80211_STA_SSID_SET; + memcpy(sdata->u.mgd.ssid, req->ssid, req->ssid_len); + sdata->u.mgd.ssid_len = req->ssid_len; + sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; + } + + kfree(sdata->u.mgd.sme_auth_ie); + sdata->u.mgd.sme_auth_ie = NULL; + sdata->u.mgd.sme_auth_ie_len = 0; + if (req->ie) { + sdata->u.mgd.sme_auth_ie = kmalloc(req->ie_len, GFP_KERNEL); + if (sdata->u.mgd.sme_auth_ie == NULL) + return -ENOMEM; + memcpy(sdata->u.mgd.sme_auth_ie, req->ie, req->ie_len); + sdata->u.mgd.sme_auth_ie_len = req->ie_len; + } + + sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME; + sdata->u.mgd.state = IEEE80211_STA_MLME_DIRECT_PROBE; + ieee80211_sta_req_auth(sdata); + return 0; +} + +static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_assoc_request *req) +{ + struct ieee80211_sub_if_data *sdata; + int ret; + + if (!netif_running(dev)) + return -ENETDOWN; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + + if (memcmp(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN) != 0 || + !(sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED)) + return -ENOLINK; /* not authenticated */ + + sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; + sdata->u.mgd.flags |= IEEE80211_STA_BSSID_SET; + + /* TODO: req->chan */ + sdata->u.mgd.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL; + + if (req->ssid) { + sdata->u.mgd.flags |= IEEE80211_STA_SSID_SET; + memcpy(sdata->u.mgd.ssid, req->ssid, req->ssid_len); + sdata->u.mgd.ssid_len = req->ssid_len; + sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; + } else + sdata->u.mgd.flags |= IEEE80211_STA_AUTO_SSID_SEL; + + ret = ieee80211_sta_set_extra_ie(sdata, req->ie, req->ie_len); + if (ret) + return ret; + + sdata->u.mgd.flags |= IEEE80211_STA_EXT_SME; + sdata->u.mgd.state = IEEE80211_STA_MLME_ASSOCIATE; + ieee80211_sta_req_auth(sdata); + return 0; +} + +static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_deauth_request *req) +{ + struct ieee80211_sub_if_data *sdata; + + if (!netif_running(dev)) + return -ENETDOWN; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + + /* TODO: req->ie */ + return ieee80211_sta_deauthenticate(sdata, req->reason_code); +} + +static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev, + struct cfg80211_disassoc_request *req) +{ + struct ieee80211_sub_if_data *sdata; + + if (!netif_running(dev)) + return -ENETDOWN; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EOPNOTSUPP; + + /* TODO: req->ie */ + return ieee80211_sta_disassociate(sdata, req->reason_code); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1333,4 +1469,8 @@ struct cfg80211_ops mac80211_config_ops = { .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, + .auth = ieee80211_auth, + .assoc = ieee80211_assoc, + .deauth = ieee80211_deauth, + .disassoc = ieee80211_disassoc, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index ad12c2a03a95..7b96d95f48b1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -256,6 +256,7 @@ struct mesh_preq_queue { #define IEEE80211_STA_TKIP_WEP_USED BIT(14) #define IEEE80211_STA_CSA_RECEIVED BIT(15) #define IEEE80211_STA_MFP_ENABLED BIT(16) +#define IEEE80211_STA_EXT_SME BIT(17) /* flags for MLME request */ #define IEEE80211_STA_REQ_SCAN 0 #define IEEE80211_STA_REQ_DIRECT_PROBE 1 @@ -266,6 +267,7 @@ struct mesh_preq_queue { #define IEEE80211_AUTH_ALG_OPEN BIT(0) #define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) #define IEEE80211_AUTH_ALG_LEAP BIT(2) +#define IEEE80211_AUTH_ALG_FT BIT(3) struct ieee80211_if_managed { struct timer_list timer; @@ -335,6 +337,9 @@ struct ieee80211_if_managed { size_t ie_deauth_len; u8 *ie_disassoc; size_t ie_disassoc_len; + + u8 *sme_auth_ie; + size_t sme_auth_ie_len; }; enum ieee80211_ibss_flags { @@ -970,7 +975,7 @@ ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_rx_status *rx_status); int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, - char *ie, size_t len); + const char *ie, size_t len); void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); void ieee80211_scan_failed(struct ieee80211_local *local); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 6dc7a61bc18b..d1bcc8438772 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -730,6 +730,8 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; + u8 *ies; + size_t ies_len; ifmgd->auth_tries++; if (ifmgd->auth_tries > IEEE80211_AUTH_MAX_TRIES) { @@ -755,7 +757,14 @@ static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "%s: authenticate with AP %pM\n", sdata->dev->name, ifmgd->bssid); - ieee80211_send_auth(sdata, 1, ifmgd->auth_alg, NULL, 0, + if (ifmgd->flags & IEEE80211_STA_EXT_SME) { + ies = ifmgd->sme_auth_ie; + ies_len = ifmgd->sme_auth_ie_len; + } else { + ies = NULL; + ies_len = 0; + } + ieee80211_send_auth(sdata, 1, ifmgd->auth_alg, ies, ies_len, ifmgd->bssid, 0); ifmgd->auth_transaction = 2; @@ -870,7 +879,8 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata) int wep_privacy; int privacy_invoked; - if (!ifmgd || (ifmgd->flags & IEEE80211_STA_MIXED_CELL)) + if (!ifmgd || (ifmgd->flags & (IEEE80211_STA_MIXED_CELL | + IEEE80211_STA_EXT_SME))) return 0; bss = ieee80211_rx_bss_get(local, ifmgd->bssid, @@ -998,7 +1008,11 @@ static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name); ifmgd->flags |= IEEE80211_STA_AUTHENTICATED; - ieee80211_associate(sdata); + if (ifmgd->flags & IEEE80211_STA_EXT_SME) { + /* Wait for SME to request association */ + ifmgd->state = IEEE80211_STA_MLME_DISABLED; + } else + ieee80211_associate(sdata); } @@ -1084,6 +1098,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, switch (ifmgd->auth_alg) { case WLAN_AUTH_OPEN: case WLAN_AUTH_LEAP: + case WLAN_AUTH_FT: ieee80211_auth_completed(sdata); cfg80211_send_rx_auth(sdata->dev, (u8 *) mgmt, len); break; @@ -1117,9 +1132,10 @@ static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: deauthenticated (Reason: %u)\n", sdata->dev->name, reason_code); - if (ifmgd->state == IEEE80211_STA_MLME_AUTHENTICATE || - ifmgd->state == IEEE80211_STA_MLME_ASSOCIATE || - ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) { + if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) && + (ifmgd->state == IEEE80211_STA_MLME_AUTHENTICATE || + ifmgd->state == IEEE80211_STA_MLME_ASSOCIATE || + ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED)) { ifmgd->state = IEEE80211_STA_MLME_DIRECT_PROBE; mod_timer(&ifmgd->timer, jiffies + IEEE80211_RETRY_AUTH_INTERVAL); @@ -1150,7 +1166,8 @@ static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: disassociated (Reason: %u)\n", sdata->dev->name, reason_code); - if (ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) { + if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) && + ifmgd->state == IEEE80211_STA_MLME_ASSOCIATED) { ifmgd->state = IEEE80211_STA_MLME_ASSOCIATE; mod_timer(&ifmgd->timer, jiffies + IEEE80211_RETRY_AUTH_INTERVAL); @@ -1664,6 +1681,8 @@ static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata) ifmgd->auth_alg = WLAN_AUTH_SHARED_KEY; else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_LEAP) ifmgd->auth_alg = WLAN_AUTH_LEAP; + else if (ifmgd->auth_algs & IEEE80211_AUTH_ALG_FT) + ifmgd->auth_alg = WLAN_AUTH_FT; else ifmgd->auth_alg = WLAN_AUTH_OPEN; ifmgd->auth_transaction = -1; @@ -1687,7 +1706,8 @@ static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata) u16 capa_val = WLAN_CAPABILITY_ESS; struct ieee80211_channel *chan = local->oper_channel; - if (ifmgd->flags & (IEEE80211_STA_AUTO_SSID_SEL | + if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) && + ifmgd->flags & (IEEE80211_STA_AUTO_SSID_SEL | IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL)) { capa_mask |= WLAN_CAPABILITY_PRIVACY; @@ -1884,7 +1904,11 @@ void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata) ieee80211_set_disassoc(sdata, true, true, WLAN_REASON_DEAUTH_LEAVING); - set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); + if (!(ifmgd->flags & IEEE80211_STA_EXT_SME) || + ifmgd->state != IEEE80211_STA_MLME_ASSOCIATE) + set_bit(IEEE80211_STA_REQ_AUTH, &ifmgd->request); + else if (ifmgd->flags & IEEE80211_STA_EXT_SME) + set_bit(IEEE80211_STA_REQ_RUN, &ifmgd->request); queue_work(local->hw.workqueue, &ifmgd->work); } } @@ -1953,7 +1977,8 @@ int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) return ieee80211_sta_commit(sdata); } -int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) +int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, + const char *ie, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index e55d2834764c..ce21d66b1023 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -137,6 +137,7 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, if (ret) return ret; sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; + sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME; ieee80211_sta_req_auth(sdata); return 0; } @@ -224,6 +225,7 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, if (ret) return ret; + sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME; ieee80211_sta_req_auth(sdata); return 0; } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) @@ -287,6 +289,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data); if (ret) return ret; + sdata->u.mgd.flags &= ~IEEE80211_STA_EXT_SME; ieee80211_sta_req_auth(sdata); return 0; } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c034c2418cb3..9e1318d1d4bb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -111,6 +111,11 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { .len = IEEE80211_MAX_DATA_LEN }, [NL80211_ATTR_SCAN_FREQUENCIES] = { .type = NLA_NESTED }, [NL80211_ATTR_SCAN_SSIDS] = { .type = NLA_NESTED }, + + [NL80211_ATTR_SSID] = { .type = NLA_BINARY, + .len = IEEE80211_MAX_SSID_LEN }, + [NL80211_ATTR_AUTH_TYPE] = { .type = NLA_U32 }, + [NL80211_ATTR_REASON_CODE] = { .type = NLA_U16 }, }; /* message building helper */ @@ -265,6 +270,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(set_mesh_params, SET_MESH_PARAMS); CMD(change_bss, SET_BSS); CMD(set_mgmt_extra_ie, SET_MGMT_EXTRA_IE); + CMD(auth, AUTHENTICATE); + CMD(assoc, ASSOCIATE); + CMD(deauth, DEAUTHENTICATE); + CMD(disassoc, DISASSOCIATE); #undef CMD nla_nest_end(msg, nl_cmds); @@ -2646,6 +2655,228 @@ static int nl80211_dump_scan(struct sk_buff *skb, return err; } +static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct cfg80211_auth_request req; + struct wiphy *wiphy; + int err; + + rtnl_lock(); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + goto unlock_rtnl; + + if (!drv->ops->auth) { + err = -EOPNOTSUPP; + goto out; + } + + if (!info->attrs[NL80211_ATTR_MAC]) { + err = -EINVAL; + goto out; + } + + wiphy = &drv->wiphy; + memset(&req, 0, sizeof(req)); + + req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + req.chan = ieee80211_get_channel( + wiphy, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (!req.chan) { + err = -EINVAL; + goto out; + } + } + + if (info->attrs[NL80211_ATTR_SSID]) { + req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + } + + if (info->attrs[NL80211_ATTR_IE]) { + req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { + req.auth_type = + nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); + } + + err = drv->ops->auth(&drv->wiphy, dev, &req); + +out: + cfg80211_put_dev(drv); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + +static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct cfg80211_assoc_request req; + struct wiphy *wiphy; + int err; + + rtnl_lock(); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + goto unlock_rtnl; + + if (!drv->ops->assoc) { + err = -EOPNOTSUPP; + goto out; + } + + if (!info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_SSID]) { + err = -EINVAL; + goto out; + } + + wiphy = &drv->wiphy; + memset(&req, 0, sizeof(req)); + + req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { + req.chan = ieee80211_get_channel( + wiphy, + nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); + if (!req.chan) { + err = -EINVAL; + goto out; + } + } + + if (nla_len(info->attrs[NL80211_ATTR_SSID]) > IEEE80211_MAX_SSID_LEN) { + err = -EINVAL; + goto out; + } + req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); + req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); + + if (info->attrs[NL80211_ATTR_IE]) { + req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + err = drv->ops->assoc(&drv->wiphy, dev, &req); + +out: + cfg80211_put_dev(drv); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + +static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct cfg80211_deauth_request req; + struct wiphy *wiphy; + int err; + + rtnl_lock(); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + goto unlock_rtnl; + + if (!drv->ops->deauth) { + err = -EOPNOTSUPP; + goto out; + } + + if (!info->attrs[NL80211_ATTR_MAC]) { + err = -EINVAL; + goto out; + } + + wiphy = &drv->wiphy; + memset(&req, 0, sizeof(req)); + + req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (info->attrs[NL80211_ATTR_REASON_CODE]) + req.reason_code = + nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); + + if (info->attrs[NL80211_ATTR_IE]) { + req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + err = drv->ops->deauth(&drv->wiphy, dev, &req); + +out: + cfg80211_put_dev(drv); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + +static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + struct net_device *dev; + struct cfg80211_disassoc_request req; + struct wiphy *wiphy; + int err; + + rtnl_lock(); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + goto unlock_rtnl; + + if (!drv->ops->disassoc) { + err = -EOPNOTSUPP; + goto out; + } + + if (!info->attrs[NL80211_ATTR_MAC]) { + err = -EINVAL; + goto out; + } + + wiphy = &drv->wiphy; + memset(&req, 0, sizeof(req)); + + req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + + if (info->attrs[NL80211_ATTR_REASON_CODE]) + req.reason_code = + nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); + + if (info->attrs[NL80211_ATTR_IE]) { + req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); + req.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); + } + + err = drv->ops->disassoc(&drv->wiphy, dev, &req); + +out: + cfg80211_put_dev(drv); + dev_put(dev); +unlock_rtnl: + rtnl_unlock(); + return err; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -2829,6 +3060,30 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .dumpit = nl80211_dump_scan, }, + { + .cmd = NL80211_CMD_AUTHENTICATE, + .doit = nl80211_authenticate, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_ASSOCIATE, + .doit = nl80211_associate, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_DEAUTHENTICATE, + .doit = nl80211_deauthenticate, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_DISASSOCIATE, + .doit = nl80211_disassociate, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { .name = "mlme", -- cgit v1.2.3 From 827b1fb44b7e41377a5498b9d070a11dfae2c283 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 13 Mar 2009 11:44:18 +0100 Subject: mac80211: resume properly, add suspend/resume test When mac80211 resumes, it currently doesn't reconfigure the interfaces entirely and also doesn't reconfigure BSS information -- fix this. Also, to be able to test this, add a debugfs file that just calls the suspend/resume code to see what happens when we go through that, without needing the time-consuming suspend/resume cycle. (Original version broke the build for CONFIG_PM=n. Define alternative functions for that situation. -- JWL) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/debugfs.c | 24 ++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 12 ++++++++++++ net/mac80211/pm.c | 29 +++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) (limited to 'net') diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index e37f557de3f3..210b9b6fecd2 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -40,6 +40,10 @@ static const struct file_operations name## _ops = { \ local->debugfs.name = debugfs_create_file(#name, 0400, phyd, \ local, &name## _ops); +#define DEBUGFS_ADD_MODE(name, mode) \ + local->debugfs.name = debugfs_create_file(#name, mode, phyd, \ + local, &name## _ops); + #define DEBUGFS_DEL(name) \ debugfs_remove(local->debugfs.name); \ local->debugfs.name = NULL; @@ -113,6 +117,24 @@ static const struct file_operations tsf_ops = { .open = mac80211_open_file_generic }; +static ssize_t reset_write(struct file *file, const char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_local *local = file->private_data; + + rtnl_lock(); + __ieee80211_suspend(&local->hw); + __ieee80211_resume(&local->hw); + rtnl_unlock(); + + return count; +} + +static const struct file_operations reset_ops = { + .write = reset_write, + .open = mac80211_open_file_generic, +}; + /* statistics stuff */ #define DEBUGFS_STATS_FILE(name, buflen, fmt, value...) \ @@ -254,6 +276,7 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(total_ps_buffered); DEBUGFS_ADD(wep_iv); DEBUGFS_ADD(tsf); + DEBUGFS_ADD_MODE(reset, 0200); statsd = debugfs_create_dir("statistics", phyd); local->debugfs.statistics = statsd; @@ -308,6 +331,7 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(total_ps_buffered); DEBUGFS_DEL(wep_iv); DEBUGFS_DEL(tsf); + DEBUGFS_DEL(reset); DEBUGFS_STATS_DEL(transmitted_fragment_count); DEBUGFS_STATS_DEL(multicast_transmitted_frame_count); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 7b96d95f48b1..547cfac218ee 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -780,6 +780,7 @@ struct ieee80211_local { struct dentry *total_ps_buffered; struct dentry *wep_iv; struct dentry *tsf; + struct dentry *reset; struct dentry *statistics; struct local_debugfsdentries_statsdentries { struct dentry *transmitted_fragment_count; @@ -1059,8 +1060,19 @@ void ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, u8 pwr_constr_elem_len); /* Suspend/resume */ +#ifdef CONFIG_PM int __ieee80211_suspend(struct ieee80211_hw *hw); int __ieee80211_resume(struct ieee80211_hw *hw); +#else +static inline int __ieee80211_suspend(struct ieee80211_hw *hw) +{ + return 0; +} +static inline int __ieee80211_resume(struct ieee80211_hw *hw) +{ + return 0; +} +#endif /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 1e6152ac6778..027302326498 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -143,6 +143,35 @@ int __ieee80211_resume(struct ieee80211_hw *hw) ieee80211_configure_filter(local); netif_addr_unlock_bh(local->mdev); + /* Finally also reconfigure all the BSS information */ + list_for_each_entry(sdata, &local->interfaces, list) { + u32 changed = ~0; + if (!netif_running(sdata->dev)) + continue; + switch (sdata->vif.type) { + case NL80211_IFTYPE_STATION: + /* disable beacon change bits */ + changed &= ~IEEE80211_IFCC_BEACON; + /* fall through */ + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_MESH_POINT: + WARN_ON(ieee80211_if_config(sdata, changed)); + ieee80211_bss_info_change_notify(sdata, ~0); + break; + case NL80211_IFTYPE_WDS: + break; + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: + /* ignore virtual */ + break; + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: + WARN_ON(1); + break; + } + } + ieee80211_wake_queues_by_reason(hw, IEEE80211_QUEUE_STOP_REASON_SUSPEND); -- cgit v1.2.3 From d7873cb9abb5d8b4b9f7f5749af06e4e03798733 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 20 Mar 2009 15:53:16 +0200 Subject: mac80211: Fix memleak in nl80211 authentication on deinit This file was forgotten from the quilt patch that added MLME primitives, so the kfree on interface removal is missing. Fix this potential memleak by freeing the temporary Authentication frame IEs from SME when the interface is being removed. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/iface.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f9f27b9cadbe..6b56dc2208e7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -660,6 +660,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev) kfree(sdata->u.mgd.ie_reassocreq); kfree(sdata->u.mgd.ie_deauth); kfree(sdata->u.mgd.ie_disassoc); + kfree(sdata->u.mgd.sme_auth_ie); break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: -- cgit v1.2.3 From 65fc73ac4a310945dfeceac961726c2765ad2ec0 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 20 Mar 2009 21:21:16 +0200 Subject: nl80211: Remove NL80211_CMD_SET_MGMT_EXTRA_IE The functionality that NL80211_CMD_SET_MGMT_EXTRA_IE provided can now be achieved with cleaner design by adding IE(s) into NL80211_CMD_TRIGGER_SCAN, NL80211_CMD_AUTHENTICATE, NL80211_CMD_ASSOCIATE, NL80211_CMD_DEAUTHENTICATE, and NL80211_CMD_DISASSOCIATE. Since this is a very recently added command and there are no known (or known planned) applications using NL80211_CMD_SET_MGMT_EXTRA_IE and taken into account how much extra complexity it adds to the IE processing we have now (and need to add in the future to fix IE order in couple of frames), it looks like the best option is to just remove the implementation of this command for now. The enum values themselves are left to avoid changing the nl80211 command or attribute numbers. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 86 ---------------------------------------------- net/mac80211/ieee80211_i.h | 15 -------- net/mac80211/iface.c | 7 ---- net/mac80211/mlme.c | 36 ++----------------- net/mac80211/util.c | 29 +++------------- net/wireless/nl80211.c | 47 ------------------------- 6 files changed, 7 insertions(+), 213 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 223e536e8426..f5c15c9a00ce 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1181,91 +1181,6 @@ static int ieee80211_set_channel(struct wiphy *wiphy, return ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_CHANNEL); } -static int set_mgmt_extra_ie_sta(struct ieee80211_sub_if_data *sdata, - u8 subtype, u8 *ies, size_t ies_len) -{ - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - - switch (subtype) { - case IEEE80211_STYPE_PROBE_REQ >> 4: - if (local->ops->hw_scan) - break; - kfree(ifmgd->ie_probereq); - ifmgd->ie_probereq = ies; - ifmgd->ie_probereq_len = ies_len; - return 0; - case IEEE80211_STYPE_PROBE_RESP >> 4: - kfree(ifmgd->ie_proberesp); - ifmgd->ie_proberesp = ies; - ifmgd->ie_proberesp_len = ies_len; - return 0; - case IEEE80211_STYPE_AUTH >> 4: - kfree(ifmgd->ie_auth); - ifmgd->ie_auth = ies; - ifmgd->ie_auth_len = ies_len; - return 0; - case IEEE80211_STYPE_ASSOC_REQ >> 4: - kfree(ifmgd->ie_assocreq); - ifmgd->ie_assocreq = ies; - ifmgd->ie_assocreq_len = ies_len; - return 0; - case IEEE80211_STYPE_REASSOC_REQ >> 4: - kfree(ifmgd->ie_reassocreq); - ifmgd->ie_reassocreq = ies; - ifmgd->ie_reassocreq_len = ies_len; - return 0; - case IEEE80211_STYPE_DEAUTH >> 4: - kfree(ifmgd->ie_deauth); - ifmgd->ie_deauth = ies; - ifmgd->ie_deauth_len = ies_len; - return 0; - case IEEE80211_STYPE_DISASSOC >> 4: - kfree(ifmgd->ie_disassoc); - ifmgd->ie_disassoc = ies; - ifmgd->ie_disassoc_len = ies_len; - return 0; - } - - return -EOPNOTSUPP; -} - -static int ieee80211_set_mgmt_extra_ie(struct wiphy *wiphy, - struct net_device *dev, - struct mgmt_extra_ie_params *params) -{ - struct ieee80211_sub_if_data *sdata; - u8 *ies; - size_t ies_len; - int ret = -EOPNOTSUPP; - - if (params->ies) { - ies = kmemdup(params->ies, params->ies_len, GFP_KERNEL); - if (ies == NULL) - return -ENOMEM; - ies_len = params->ies_len; - } else { - ies = NULL; - ies_len = 0; - } - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - switch (sdata->vif.type) { - case NL80211_IFTYPE_STATION: - ret = set_mgmt_extra_ie_sta(sdata, params->subtype, - ies, ies_len); - break; - default: - ret = -EOPNOTSUPP; - break; - } - - if (ret) - kfree(ies); - return ret; -} - #ifdef CONFIG_PM static int ieee80211_suspend(struct wiphy *wiphy) { @@ -1465,7 +1380,6 @@ struct cfg80211_ops mac80211_config_ops = { .change_bss = ieee80211_change_bss, .set_txq_params = ieee80211_set_txq_params, .set_channel = ieee80211_set_channel, - .set_mgmt_extra_ie = ieee80211_set_mgmt_extra_ie, .suspend = ieee80211_suspend, .resume = ieee80211_resume, .scan = ieee80211_scan, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 547cfac218ee..f69e84ab9617 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -323,21 +323,6 @@ struct ieee80211_if_managed { int wmm_last_param_set; /* Extra IE data for management frames */ - u8 *ie_probereq; - size_t ie_probereq_len; - u8 *ie_proberesp; - size_t ie_proberesp_len; - u8 *ie_auth; - size_t ie_auth_len; - u8 *ie_assocreq; - size_t ie_assocreq_len; - u8 *ie_reassocreq; - size_t ie_reassocreq_len; - u8 *ie_deauth; - size_t ie_deauth_len; - u8 *ie_disassoc; - size_t ie_disassoc_len; - u8 *sme_auth_ie; size_t sme_auth_ie_len; }; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 6b56dc2208e7..34f4798a98f7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -653,13 +653,6 @@ static void ieee80211_teardown_sdata(struct net_device *dev) kfree(sdata->u.mgd.extra_ie); kfree(sdata->u.mgd.assocreq_ies); kfree(sdata->u.mgd.assocresp_ies); - kfree(sdata->u.mgd.ie_probereq); - kfree(sdata->u.mgd.ie_proberesp); - kfree(sdata->u.mgd.ie_auth); - kfree(sdata->u.mgd.ie_assocreq); - kfree(sdata->u.mgd.ie_reassocreq); - kfree(sdata->u.mgd.ie_deauth); - kfree(sdata->u.mgd.ie_disassoc); kfree(sdata->u.mgd.sme_auth_ie); break; case NL80211_IFTYPE_WDS: diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index d1bcc8438772..b0808efcedf6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -82,38 +82,23 @@ static int ieee80211_compatible_rates(struct ieee80211_bss *bss, /* frame sending functions */ -static void add_extra_ies(struct sk_buff *skb, u8 *ies, size_t ies_len) -{ - if (ies) - memcpy(skb_put(skb, ies_len), ies, ies_len); -} - static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos, *ies, *ht_ie, *e_ies; + u8 *pos, *ies, *ht_ie; int i, len, count, rates_len, supp_rates_len; u16 capab; struct ieee80211_bss *bss; int wmm = 0; struct ieee80211_supported_band *sband; u32 rates = 0; - size_t e_ies_len; - - if (ifmgd->flags & IEEE80211_STA_PREV_BSSID_SET) { - e_ies = sdata->u.mgd.ie_reassocreq; - e_ies_len = sdata->u.mgd.ie_reassocreq_len; - } else { - e_ies = sdata->u.mgd.ie_assocreq; - e_ies_len = sdata->u.mgd.ie_assocreq_len; - } skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + ifmgd->extra_ie_len + - ifmgd->ssid_len + e_ies_len); + ifmgd->ssid_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " "frame\n", sdata->dev->name); @@ -304,8 +289,6 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) memcpy(pos, &sband->ht_cap.mcs, sizeof(sband->ht_cap.mcs)); } - add_extra_ies(skb, e_ies, e_ies_len); - kfree(ifmgd->assocreq_ies); ifmgd->assocreq_ies_len = (skb->data + skb->len) - ies; ifmgd->assocreq_ies = kmalloc(ifmgd->assocreq_ies_len, GFP_KERNEL); @@ -323,19 +306,8 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *ies; - size_t ies_len; - if (stype == IEEE80211_STYPE_DEAUTH) { - ies = sdata->u.mgd.ie_deauth; - ies_len = sdata->u.mgd.ie_deauth_len; - } else { - ies = sdata->u.mgd.ie_disassoc; - ies_len = sdata->u.mgd.ie_disassoc_len; - } - - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + - ies_len); + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for " "deauth/disassoc frame\n", sdata->dev->name); @@ -353,8 +325,6 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, /* u.deauth.reason_code == u.disassoc.reason_code */ mgmt->u.deauth.reason_code = cpu_to_le16(reason); - add_extra_ies(skb, ies, ies_len); - ieee80211_tx_skb(sdata, skb, ifmgd->flags & IEEE80211_STA_MFP_ENABLED); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index e0431a1d218b..444bb14c95e1 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -846,16 +846,9 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - const u8 *ie_auth = NULL; - int ie_auth_len = 0; - - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - ie_auth_len = sdata->u.mgd.ie_auth_len; - ie_auth = sdata->u.mgd.ie_auth; - } skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*mgmt) + 6 + extra_len + ie_auth_len); + sizeof(*mgmt) + 6 + extra_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for auth " "frame\n", sdata->dev->name); @@ -877,8 +870,6 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, mgmt->u.auth.status_code = cpu_to_le16(0); if (extra) memcpy(skb_put(skb, extra_len), extra, extra_len); - if (ie_auth) - memcpy(skb_put(skb, ie_auth_len), ie_auth, ie_auth_len); ieee80211_tx_skb(sdata, skb, encrypt); } @@ -891,20 +882,11 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, struct ieee80211_supported_band *sband; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos, *supp_rates, *esupp_rates = NULL, *extra_preq_ie = NULL; - int i, extra_preq_ie_len = 0; - - switch (sdata->vif.type) { - case NL80211_IFTYPE_STATION: - extra_preq_ie_len = sdata->u.mgd.ie_probereq_len; - extra_preq_ie = sdata->u.mgd.ie_probereq; - break; - default: - break; - } + u8 *pos, *supp_rates, *esupp_rates = NULL; + int i; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200 + - ie_len + extra_preq_ie_len); + ie_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for probe " "request\n", sdata->dev->name); @@ -953,9 +935,6 @@ void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, if (ie) memcpy(skb_put(skb, ie_len), ie, ie_len); - if (extra_preq_ie) - memcpy(skb_put(skb, extra_preq_ie_len), extra_preq_ie, - extra_preq_ie_len); ieee80211_tx_skb(sdata, skb, 0); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9e1318d1d4bb..44c79972be57 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -269,7 +269,6 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(add_mpath, NEW_MPATH); CMD(set_mesh_params, SET_MESH_PARAMS); CMD(change_bss, SET_BSS); - CMD(set_mgmt_extra_ie, SET_MGMT_EXTRA_IE); CMD(auth, AUTHENTICATE); CMD(assoc, ASSOCIATE); CMD(deauth, DEAUTHENTICATE); @@ -2355,46 +2354,6 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } -static int nl80211_set_mgmt_extra_ie(struct sk_buff *skb, - struct genl_info *info) -{ - struct cfg80211_registered_device *drv; - int err; - struct net_device *dev; - struct mgmt_extra_ie_params params; - - memset(¶ms, 0, sizeof(params)); - - if (!info->attrs[NL80211_ATTR_MGMT_SUBTYPE]) - return -EINVAL; - params.subtype = nla_get_u8(info->attrs[NL80211_ATTR_MGMT_SUBTYPE]); - if (params.subtype > 15) - return -EINVAL; /* FC Subtype field is 4 bits (0..15) */ - - if (info->attrs[NL80211_ATTR_IE]) { - params.ies = nla_data(info->attrs[NL80211_ATTR_IE]); - params.ies_len = nla_len(info->attrs[NL80211_ATTR_IE]); - } - - rtnl_lock(); - - err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); - if (err) - goto out_rtnl; - - if (drv->ops->set_mgmt_extra_ie) - err = drv->ops->set_mgmt_extra_ie(&drv->wiphy, dev, ¶ms); - else - err = -EOPNOTSUPP; - - cfg80211_put_dev(drv); - dev_put(dev); - out_rtnl: - rtnl_unlock(); - - return err; -} - static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *drv; @@ -3043,12 +3002,6 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, - { - .cmd = NL80211_CMD_SET_MGMT_EXTRA_IE, - .doit = nl80211_set_mgmt_extra_ie, - .policy = nl80211_policy, - .flags = GENL_ADMIN_PERM, - }, { .cmd = NL80211_CMD_TRIGGER_SCAN, .doit = nl80211_trigger_scan, -- cgit v1.2.3 From 255e737eab645ec6037baeca04a5e0a7c3b1f459 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 20 Mar 2009 21:21:17 +0200 Subject: nl80211: Add more through validation of MLME command parameters Check that the used authentication type and reason code are valid here so that drivers/mac80211 do not need to care about this. In addition, remove the unnecessary validation of SSID attribute length which is taken care of by netlink policy. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 44c79972be57..6f38ee7a3c92 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2614,6 +2614,14 @@ static int nl80211_dump_scan(struct sk_buff *skb, return err; } +static bool nl80211_valid_auth_type(enum nl80211_auth_type auth_type) +{ + return auth_type == NL80211_AUTHTYPE_OPEN_SYSTEM || + auth_type == NL80211_AUTHTYPE_SHARED_KEY || + auth_type == NL80211_AUTHTYPE_FT || + auth_type == NL80211_AUTHTYPE_NETWORK_EAP; +} + static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *drv; @@ -2666,6 +2674,10 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_AUTH_TYPE]) { req.auth_type = nla_get_u32(info->attrs[NL80211_ATTR_AUTH_TYPE]); + if (!nl80211_valid_auth_type(req.auth_type)) { + err = -EINVAL; + goto out; + } } err = drv->ops->auth(&drv->wiphy, dev, &req); @@ -2718,10 +2730,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) } } - if (nla_len(info->attrs[NL80211_ATTR_SSID]) > IEEE80211_MAX_SSID_LEN) { - err = -EINVAL; - goto out; - } req.ssid = nla_data(info->attrs[NL80211_ATTR_SSID]); req.ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]); @@ -2769,9 +2777,15 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (info->attrs[NL80211_ATTR_REASON_CODE]) + if (info->attrs[NL80211_ATTR_REASON_CODE]) { req.reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); + if (req.reason_code == 0) { + /* Reason Code 0 is reserved */ + err = -EINVAL; + goto out; + } + } if (info->attrs[NL80211_ATTR_IE]) { req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); @@ -2817,9 +2831,15 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) req.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); - if (info->attrs[NL80211_ATTR_REASON_CODE]) + if (info->attrs[NL80211_ATTR_REASON_CODE]) { req.reason_code = nla_get_u16(info->attrs[NL80211_ATTR_REASON_CODE]); + if (req.reason_code == 0) { + /* Reason Code 0 is reserved */ + err = -EINVAL; + goto out; + } + } if (info->attrs[NL80211_ATTR_IE]) { req.ie = nla_data(info->attrs[NL80211_ATTR_IE]); -- cgit v1.2.3 From 35a8efe1a67ba5d7bb7492f67f52ed2aa4925892 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 20 Mar 2009 21:21:18 +0200 Subject: nl80211: Check that netif_runnin is true in cfg80211 code We do not want to require all the drivers using cfg80211 to need to do this or to be prepared to handle these commands when the interface is down. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 25 ------------------------- net/wireless/nl80211.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 25 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f5c15c9a00ce..b5810b4c79ac 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -728,10 +728,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, int err; int layer2_update; - /* Prevent a race with changing the rate control algorithm */ - if (!netif_running(dev)) - return -ENETDOWN; - if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); @@ -860,9 +856,6 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; int err; - if (!netif_running(dev)) - return -ENETDOWN; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) @@ -913,9 +906,6 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, struct mesh_path *mpath; struct sta_info *sta; - if (!netif_running(dev)) - return -ENETDOWN; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) @@ -1202,9 +1192,6 @@ static int ieee80211_scan(struct wiphy *wiphy, { struct ieee80211_sub_if_data *sdata; - if (!netif_running(dev)) - return -ENETDOWN; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_STATION && @@ -1220,9 +1207,6 @@ static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata; - if (!netif_running(dev)) - return -ENETDOWN; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_STATION) @@ -1282,9 +1266,6 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata; int ret; - if (!netif_running(dev)) - return -ENETDOWN; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_STATION) @@ -1323,9 +1304,6 @@ static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata; - if (!netif_running(dev)) - return -ENETDOWN; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; @@ -1339,9 +1317,6 @@ static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata; - if (!netif_running(dev)) - return -ENETDOWN; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type != NL80211_IFTYPE_STATION) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6f38ee7a3c92..6bb73a3a3391 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1556,6 +1556,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) goto out; } + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + err = drv->ops->add_station(&drv->wiphy, dev, mac_addr, ¶ms); out: @@ -1808,6 +1813,11 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) goto out; } + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + err = drv->ops->change_mpath(&drv->wiphy, dev, dst, next_hop); out: @@ -1846,6 +1856,11 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) goto out; } + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + err = drv->ops->add_mpath(&drv->wiphy, dev, dst, next_hop); out: @@ -2380,6 +2395,11 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) goto out; } + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + if (drv->scan_req) { err = -EBUSY; goto out; @@ -2641,6 +2661,11 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) goto out; } + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + if (!info->attrs[NL80211_ATTR_MAC]) { err = -EINVAL; goto out; @@ -2709,6 +2734,11 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) goto out; } + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + if (!info->attrs[NL80211_ATTR_MAC] || !info->attrs[NL80211_ATTR_SSID]) { err = -EINVAL; @@ -2767,6 +2797,11 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) goto out; } + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + if (!info->attrs[NL80211_ATTR_MAC]) { err = -EINVAL; goto out; @@ -2821,6 +2856,11 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) goto out; } + if (!netif_running(dev)) { + err = -ENETDOWN; + goto out; + } + if (!info->attrs[NL80211_ATTR_MAC]) { err = -EINVAL; goto out; -- cgit v1.2.3 From eec60b037a875513d9715dcdb90b13ed81fc5f26 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 20 Mar 2009 21:21:19 +0200 Subject: nl80211: Check iftype in cfg80211 code We do not want to require all the drivers using cfg80211 to need to do this. In addition, make the error values consistent by using EOPNOTSUPP instead of semi-random assortment of errno values. Signed-off-by: Jouni Malinen Signed-off-by: John W. Linville --- net/mac80211/cfg.c | 40 ---------------------------------- net/wireless/nl80211.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 42 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b5810b4c79ac..e677b751d468 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -540,9 +540,6 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_AP) - return -EINVAL; - old = sdata->u.ap.beacon; if (old) @@ -559,9 +556,6 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_AP) - return -EINVAL; - old = sdata->u.ap.beacon; if (!old) @@ -577,9 +571,6 @@ static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_AP) - return -EINVAL; - old = sdata->u.ap.beacon; if (!old) @@ -858,9 +849,6 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) - return -ENOTSUPP; - rcu_read_lock(); sta = sta_info_get(local, next_hop); if (!sta) { @@ -908,9 +896,6 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) - return -ENOTSUPP; - rcu_read_lock(); sta = sta_info_get(local, next_hop); @@ -979,9 +964,6 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) - return -ENOTSUPP; - rcu_read_lock(); mpath = mesh_path_lookup(dst, sdata); if (!mpath) { @@ -1003,9 +985,6 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) - return -ENOTSUPP; - rcu_read_lock(); mpath = mesh_path_lookup_by_idx(idx, sdata); if (!mpath) { @@ -1025,8 +1004,6 @@ static int ieee80211_get_mesh_params(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) - return -ENOTSUPP; memcpy(conf, &(sdata->u.mesh.mshcfg), sizeof(struct mesh_config)); return 0; } @@ -1044,9 +1021,6 @@ static int ieee80211_set_mesh_params(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) - return -ENOTSUPP; - /* Set the config options which we are interested in setting */ conf = &(sdata->u.mesh.mshcfg); if (_chg_mesh_attr(NL80211_MESHCONF_RETRY_TIMEOUT, mask)) @@ -1094,9 +1068,6 @@ static int ieee80211_change_bss(struct wiphy *wiphy, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_AP) - return -EINVAL; - if (params->use_cts_prot >= 0) { sdata->vif.bss_conf.use_cts_prot = params->use_cts_prot; changed |= BSS_CHANGED_ERP_CTS_PROT; @@ -1209,9 +1180,6 @@ static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EOPNOTSUPP; - switch (req->auth_type) { case NL80211_AUTHTYPE_OPEN_SYSTEM: sdata->u.mgd.auth_algs = IEEE80211_AUTH_ALG_OPEN; @@ -1268,9 +1236,6 @@ static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EOPNOTSUPP; - if (memcmp(sdata->u.mgd.bssid, req->peer_addr, ETH_ALEN) != 0 || !(sdata->u.mgd.flags & IEEE80211_STA_AUTHENTICATED)) return -ENOLINK; /* not authenticated */ @@ -1305,8 +1270,6 @@ static int ieee80211_deauth(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EOPNOTSUPP; /* TODO: req->ie */ return ieee80211_sta_deauthenticate(sdata, req->reason_code); @@ -1319,9 +1282,6 @@ static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_STATION) - return -EOPNOTSUPP; - /* TODO: req->ie */ return ieee80211_sta_disassociate(sdata, req->reason_code); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6bb73a3a3391..a7d0b94f6b5e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1049,6 +1049,11 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock_rtnl; + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { + err = -EOPNOTSUPP; + goto out; + } + switch (info->genlhdr->cmd) { case NL80211_CMD_NEW_BEACON: /* these are required for NEW_BEACON */ @@ -1136,6 +1141,10 @@ static int nl80211_del_beacon(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { + err = -EOPNOTSUPP; + goto out; + } err = drv->ops->del_beacon(&drv->wiphy, dev); out: @@ -1324,7 +1333,7 @@ static int nl80211_dump_station(struct sk_buff *skb, } if (!dev->ops->dump_station) { - err = -ENOSYS; + err = -EOPNOTSUPP; goto out_err; } @@ -1698,10 +1707,15 @@ static int nl80211_dump_mpath(struct sk_buff *skb, } if (!dev->ops->dump_mpath) { - err = -ENOSYS; + err = -EOPNOTSUPP; goto out_err; } + if (netdev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { + err = -EOPNOTSUPP; + goto out; + } + while (1) { err = dev->ops->dump_mpath(&dev->wiphy, netdev, path_idx, dst, next_hop, &pinfo); @@ -1759,6 +1773,11 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { + err = -EOPNOTSUPP; + goto out; + } + err = drv->ops->get_mpath(&drv->wiphy, dev, dst, next_hop, &pinfo); if (err) goto out; @@ -1813,6 +1832,11 @@ static int nl80211_set_mpath(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { + err = -EOPNOTSUPP; + goto out; + } + if (!netif_running(dev)) { err = -ENETDOWN; goto out; @@ -1856,6 +1880,11 @@ static int nl80211_new_mpath(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) { + err = -EOPNOTSUPP; + goto out; + } + if (!netif_running(dev)) { err = -ENETDOWN; goto out; @@ -1944,6 +1973,11 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP) { + err = -EOPNOTSUPP; + goto out; + } + err = drv->ops->change_bss(&drv->wiphy, dev, ¶ms); out: @@ -2661,6 +2695,11 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + if (!netif_running(dev)) { err = -ENETDOWN; goto out; @@ -2734,6 +2773,11 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + if (!netif_running(dev)) { err = -ENETDOWN; goto out; @@ -2797,6 +2841,11 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + if (!netif_running(dev)) { err = -ENETDOWN; goto out; @@ -2856,6 +2905,11 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info) goto out; } + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION) { + err = -EOPNOTSUPP; + goto out; + } + if (!netif_running(dev)) { err = -ENETDOWN; goto out; -- cgit v1.2.3 From 2e097dc65673ed421bbc2e49f52c125aa43a8ee6 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 20 Mar 2009 23:53:04 -0400 Subject: cfg80211: force last_request to be set for OLD_REG if regdom is EU Although EU is a bogus alpha2 we need to process the send request as our code depends on last_request being set. Cc: stable@kernel.org Reported-by: Quentin Armitage Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index eb8b8ed16155..ead9dccb5475 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2135,11 +2135,14 @@ int regulatory_init(void) /* * The old code still requests for a new regdomain and if * you have CRDA you get it updated, otherwise you get - * stuck with the static values. We ignore "EU" code as - * that is not a valid ISO / IEC 3166 alpha2 + * stuck with the static values. Since "EU" is not a valid + * ISO / IEC 3166 alpha2 code we can't expect userpace to + * give us a regulatory domain for it. We need last_request + * iniitalized though so lets just send a request which we + * know will be ignored... this crap will be removed once + * OLD_REG dies. */ - if (ieee80211_regdom[0] != 'E' || ieee80211_regdom[1] != 'U') - err = regulatory_hint_core(ieee80211_regdom); + err = regulatory_hint_core(ieee80211_regdom); #else cfg80211_regdomain = cfg80211_world_regdom; -- cgit v1.2.3 From cc0b6fe88e99096868bdbacbf486c97299533b5a Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 20 Mar 2009 23:53:05 -0400 Subject: cfg80211: fix incorrect assumption on last_request for 11d The incorrect assumption is the last regulatory request (last_request) is always a country IE when processing country IEs. Although this is true 99% of the time the first time this happens this could not be true. This fixes an oops in the branch check for the last_request when accessing drv_last_ie. The access was done under the assumption the struct won't be null. Note to stable: to port to 29 replace as follows, only 29 has country IE code: s|NL80211_REGDOM_SET_BY_COUNTRY_IE|REGDOM_SET_BY_COUNTRY_IE Cc: stable@kernel.org Reported-by: Quentin Armitage Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ead9dccb5475..9afc9168748b 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1601,6 +1601,10 @@ static bool reg_same_country_ie_hint(struct wiphy *wiphy, assert_cfg80211_lock(); + if (unlikely(last_request->initiator != + NL80211_REGDOM_SET_BY_COUNTRY_IE)) + return false; + request_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx); if (!request_wiphy) @@ -1663,7 +1667,9 @@ void regulatory_hint_11d(struct wiphy *wiphy, * we optimize an early check to exit out early if we don't have to * do anything */ - if (likely(wiphy_idx_valid(last_request->wiphy_idx))) { + if (likely(last_request->initiator == + NL80211_REGDOM_SET_BY_COUNTRY_IE && + wiphy_idx_valid(last_request->wiphy_idx))) { struct cfg80211_registered_device *drv_last_ie; drv_last_ie = -- cgit v1.2.3 From 6ee7d33056f6e6fc7437d980dcc741816deedd0f Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 20 Mar 2009 23:53:06 -0400 Subject: cfg80211: make regdom module parameter available oustide of OLD_REG It seems a few users are using this module parameter although its not recommended. People are finding it useful despite there being utilities for setting this in userspace. I'm not aware of any distribution using this though. Until userspace and distributions catch up with a default userspace automatic replacement (GeoClue integration would be nirvana) we copy the ieee80211_regdom module parameter from OLD_REG to the new reg code to help these users migrate. Users who are using the non-valid ISO / IEC 3166 alpha "EU" in their ieee80211_regdom module parameter and migrate to non-OLD_REG enabled system will world roam. This also schedules removal of this same ieee80211_regdom module parameter circa March 2010. Hope is by then nirvana is reached and users will abandoned the module parameter completely. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 9afc9168748b..ac048a158d85 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -122,9 +122,14 @@ static const struct ieee80211_regdomain *cfg80211_world_regdom = #ifdef CONFIG_WIRELESS_OLD_REGULATORY static char *ieee80211_regdom = "US"; +#else +static char *ieee80211_regdom = "00"; +#endif + module_param(ieee80211_regdom, charp, 0444); MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); +#ifdef CONFIG_WIRELESS_OLD_REGULATORY /* * We assume 40 MHz bandwidth for the old regulatory work. * We make emphasis we are using the exact same frequencies @@ -2152,7 +2157,7 @@ int regulatory_init(void) #else cfg80211_regdomain = cfg80211_world_regdom; - err = regulatory_hint_core("00"); + err = regulatory_hint_core(ieee80211_regdom); #endif if (err) { if (err == -ENOMEM) -- cgit v1.2.3 From 86f04680df4a136a4a90501572dc2f31f8426581 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Fri, 20 Mar 2009 23:53:07 -0400 Subject: cfg80211: remove code about country IE support with OLD_REG We had left in code to allow interested developers to add support for parsing country IEs when OLD_REG was enabled. This never happened and since we're going to remove OLD_REG lets just remove these comments and code for it. This code path was never being entered so this has no functional change. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/reg.c | 39 +++++++++++---------------------------- 1 file changed, 11 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index ac048a158d85..6327e1617acb 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1420,16 +1420,6 @@ new_request: return r; } - /* - * Note: When CONFIG_WIRELESS_OLD_REGULATORY is enabled - * AND if CRDA is NOT present nothing will happen, if someone - * wants to bother with 11d with OLD_REG you can add a timer. - * If after x amount of time nothing happens you can call: - * - * return set_regdom(country_ie_regdomain); - * - * to intersect with the static rd - */ return call_crda(last_request->alpha2); } @@ -2033,28 +2023,21 @@ static int __set_regdom(const struct ieee80211_regdomain *rd) */ BUG_ON(!country_ie_regdomain); + BUG_ON(rd == country_ie_regdomain); - if (rd != country_ie_regdomain) { - /* - * Intersect what CRDA returned and our what we - * had built from the Country IE received - */ + /* + * Intersect what CRDA returned and our what we + * had built from the Country IE received + */ - intersected_rd = regdom_intersect(rd, country_ie_regdomain); + intersected_rd = regdom_intersect(rd, country_ie_regdomain); - reg_country_ie_process_debug(rd, country_ie_regdomain, - intersected_rd); + reg_country_ie_process_debug(rd, + country_ie_regdomain, + intersected_rd); - kfree(country_ie_regdomain); - country_ie_regdomain = NULL; - } else { - /* - * This would happen when CRDA was not present and - * OLD_REGULATORY was enabled. We intersect our Country - * IE rd and what was set on cfg80211 originally - */ - intersected_rd = regdom_intersect(rd, cfg80211_regdomain); - } + kfree(country_ie_regdomain); + country_ie_regdomain = NULL; if (!intersected_rd) return -EINVAL; -- cgit v1.2.3 From ac7f9cfa2c3b810e0adfb889ad407a8c79a84dbe Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2009 17:07:59 +0100 Subject: cfg80211: accept no-op interface mode changes When somebody tries to set the interface mode to the existing mode, don't ask the driver but silently accept the setting. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/nl80211.c | 28 +++++++++++++++++++++------- net/wireless/wext-compat.c | 11 +++++++++-- 2 files changed, 30 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a7d0b94f6b5e..8808431bd581 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -607,6 +607,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) enum nl80211_iftype type; struct net_device *dev; u32 _flags, *flags = NULL; + bool change = false; memset(¶ms, 0, sizeof(params)); @@ -620,11 +621,17 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) type = dev->ieee80211_ptr->iftype; dev_put(dev); - err = -EINVAL; if (info->attrs[NL80211_ATTR_IFTYPE]) { - type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); - if (type > NL80211_IFTYPE_MAX) + enum nl80211_iftype ntype; + + ntype = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); + if (type != ntype) + change = true; + type = ntype; + if (type > NL80211_IFTYPE_MAX) { + err = -EINVAL; goto unlock; + } } if (!drv->ops->change_virtual_intf || @@ -640,6 +647,7 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); + change = true; } if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { @@ -649,12 +657,18 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) } err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], &_flags); - if (!err) - flags = &_flags; + if (err) + goto unlock; + + flags = &_flags; + change = true; } - err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, - type, flags, ¶ms); + if (change) + err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, + type, flags, ¶ms); + else + err = 0; dev = __dev_get_by_index(&init_net, ifindex); WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index b84a9b4fe96a..0fd1db6e95bb 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -66,6 +66,7 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, struct cfg80211_registered_device *rdev; struct vif_params vifparams; enum nl80211_iftype type; + int ret; if (!wdev) return -EOPNOTSUPP; @@ -96,10 +97,16 @@ int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, return -EINVAL; } + if (type == wdev->iftype) + return 0; + memset(&vifparams, 0, sizeof(vifparams)); - return rdev->ops->change_virtual_intf(wdev->wiphy, dev->ifindex, type, - NULL, &vifparams); + ret = rdev->ops->change_virtual_intf(wdev->wiphy, dev->ifindex, type, + NULL, &vifparams); + WARN_ON(!ret && wdev->iftype != type); + + return ret; } EXPORT_SYMBOL(cfg80211_wext_siwmode); -- cgit v1.2.3 From 7986cf9581767d250ca0e5a554541bb276e08d21 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 21 Mar 2009 17:08:43 +0100 Subject: mac80211: remove mixed-cell and userspace MLME code Neither can currently be set from userspace, so there's no regression potential, and neither will be supported from userspace since the new userspace APIs allow the SME, which is in userspace, to control all we need. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 9 ++++----- net/mac80211/iface.c | 3 +-- net/mac80211/mlme.c | 3 +-- net/mac80211/rx.c | 13 ++++--------- net/mac80211/wext.c | 17 +---------------- 5 files changed, 11 insertions(+), 34 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f69e84ab9617..564167fbb9aa 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -247,8 +247,9 @@ struct mesh_preq_queue { #define IEEE80211_STA_ASSOCIATED BIT(4) #define IEEE80211_STA_PROBEREQ_POLL BIT(5) #define IEEE80211_STA_CREATE_IBSS BIT(6) -#define IEEE80211_STA_MIXED_CELL BIT(7) +/* hole at 7, please re-use */ #define IEEE80211_STA_WMM_ENABLED BIT(8) +/* hole at 9, please re-use */ #define IEEE80211_STA_AUTO_SSID_SEL BIT(10) #define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) #define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) @@ -411,7 +412,6 @@ struct ieee80211_if_mesh { * * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets * @IEEE80211_SDATA_PROMISC: interface is promisc - * @IEEE80211_SDATA_USERSPACE_MLME: userspace MLME is active * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between * associated stations and deliver multicast frames both @@ -420,9 +420,8 @@ struct ieee80211_if_mesh { enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_ALLMULTI = BIT(0), IEEE80211_SDATA_PROMISC = BIT(1), - IEEE80211_SDATA_USERSPACE_MLME = BIT(2), - IEEE80211_SDATA_OPERATING_GMODE = BIT(3), - IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(4), + IEEE80211_SDATA_OPERATING_GMODE = BIT(2), + IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), }; struct ieee80211_sub_if_data { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 34f4798a98f7..dd2a276fa8ca 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -261,8 +261,7 @@ static int ieee80211_open(struct net_device *dev) ieee80211_bss_info_change_notify(sdata, changed); ieee80211_enable_keys(sdata); - if (sdata->vif.type == NL80211_IFTYPE_STATION && - !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) + if (sdata->vif.type == NL80211_IFTYPE_STATION) netif_carrier_off(dev); else netif_carrier_on(dev); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b0808efcedf6..c05be09b9c6f 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -849,8 +849,7 @@ static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata) int wep_privacy; int privacy_invoked; - if (!ifmgd || (ifmgd->flags & (IEEE80211_STA_MIXED_CELL | - IEEE80211_STA_EXT_SME))) + if (!ifmgd || (ifmgd->flags & IEEE80211_STA_EXT_SME)) return 0; bss = ieee80211_rx_bss_get(local, ifmgd->bssid, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index fcc0a5995791..47d395a51923 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1877,18 +1877,13 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) if (ieee80211_vif_is_mesh(&sdata->vif)) return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_ADHOC) - return RX_DROP_MONITOR; - + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) + return ieee80211_ibss_rx_mgmt(sdata, rx->skb, rx->status); - if (sdata->vif.type == NL80211_IFTYPE_STATION) { - if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) - return RX_DROP_MONITOR; + if (sdata->vif.type == NL80211_IFTYPE_STATION) return ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); - } - return ieee80211_ibss_rx_mgmt(sdata, rx->skb, rx->status); + return RX_DROP_MONITOR; } static void ieee80211_rx_michael_mic_report(struct net_device *dev, diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index ce21d66b1023..deb4ecec122a 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -129,9 +129,6 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) - return -EOPNOTSUPP; - if (sdata->vif.type == NL80211_IFTYPE_STATION) { int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); if (ret) @@ -208,14 +205,6 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == NL80211_IFTYPE_STATION) { - if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { - if (len > IEEE80211_MAX_SSID_LEN) - return -EINVAL; - memcpy(sdata->u.mgd.ssid, ssid, len); - sdata->u.mgd.ssid_len = len; - return 0; - } - if (data->flags) sdata->u.mgd.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; else @@ -274,11 +263,7 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (sdata->vif.type == NL80211_IFTYPE_STATION) { int ret; - if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { - memcpy(sdata->u.mgd.bssid, (u8 *) &ap_addr->sa_data, - ETH_ALEN); - return 0; - } + if (is_zero_ether_addr((u8 *) &ap_addr->sa_data)) sdata->u.mgd.flags |= IEEE80211_STA_AUTO_BSSID_SEL | IEEE80211_STA_AUTO_CHANNEL_SEL; -- cgit v1.2.3 From 3cf335d527ba6af80f4143f3c9e5136afdb143af Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 22 Mar 2009 21:57:06 +0200 Subject: mac80211: decrease execution of the associated timer Currently the timer is triggering every two seconds (IEEE80211_MONITORING_INTERVAL). Decrease the timer to only trigger during data idle periods to avoid waking up CPU unnecessary. The timer will still trigger during idle periods, that needs to be fixed later. There's also a functional change that probe requests are sent only when the data path is idle, earlier they were sent also while there was activity on the data path. This is also preparation for the beacon filtering support. Thanks to Johannes Berg for the idea. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/mlme.c | 15 +++++++++++++++ net/mac80211/rx.c | 3 +++ 3 files changed, 20 insertions(+) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 564167fbb9aa..055bb776408c 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1083,6 +1083,8 @@ void ieee80211_dynamic_ps_timer(unsigned long data); void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, int powersave); +void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index c05be09b9c6f..209abb073dfb 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -909,6 +909,21 @@ static void ieee80211_associate(struct ieee80211_sub_if_data *sdata) mod_timer(&ifmgd->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); } +void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, + struct ieee80211_hdr *hdr) +{ + /* + * We can postpone the mgd.timer whenever receiving unicast frames + * from AP because we know that the connection is working both ways + * at that time. But multicast frames (and hence also beacons) must + * be ignored here, because we need to trigger the timer during + * data idle periods for sending the periodical probe request to + * the AP. + */ + if (!is_multicast_ether_addr(hdr->addr1)) + mod_timer(&sdata->u.mgd.timer, + jiffies + IEEE80211_MONITORING_INTERVAL); +} static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 47d395a51923..dbfb28465354 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -856,6 +856,9 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_CONTINUE; + if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) + ieee80211_sta_rx_notify(rx->sdata, hdr); + sta->rx_fragments++; sta->rx_bytes += rx->skb->len; sta->last_signal = rx->status->signal; -- cgit v1.2.3 From 15b7b0629c8213905926394dc73d600e0ca250ce Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 22 Mar 2009 21:57:14 +0200 Subject: mac80211: track beacons separately from the rx path activity Separate beacon and rx path tracking in preparation for the beacon filtering support. At the same time change ieee80211_associated() to look a bit simpler. Probe requests are now sent only after IEEE80211_PROBE_IDLE_TIME, which is now set to 60 seconds. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 1 + net/mac80211/mlme.c | 77 +++++++++++++++++++++++++++------------------- net/mac80211/rx.c | 6 +++- 3 files changed, 52 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 055bb776408c..8a617a7fc090 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -308,6 +308,7 @@ struct ieee80211_if_managed { unsigned long request; unsigned long last_probe; + unsigned long last_beacon; unsigned int flags; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 209abb073dfb..8f30f4d19da0 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -30,7 +30,7 @@ #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_MAX_TRIES 3 #define IEEE80211_MONITORING_INTERVAL (2 * HZ) -#define IEEE80211_PROBE_INTERVAL (60 * HZ) +#define IEEE80211_PROBE_IDLE_TIME (60 * HZ) #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) /* utils */ @@ -930,7 +930,7 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; struct sta_info *sta; - int disassoc; + bool disassoc = false; /* TODO: start monitoring current AP signal quality and number of * missed beacons. Scan other channels every now and then and search @@ -945,36 +945,39 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) if (!sta) { printk(KERN_DEBUG "%s: No STA entry for own AP %pM\n", sdata->dev->name, ifmgd->bssid); - disassoc = 1; - } else { - disassoc = 0; - if (time_after(jiffies, - sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { - if (ifmgd->flags & IEEE80211_STA_PROBEREQ_POLL) { - printk(KERN_DEBUG "%s: No ProbeResp from " - "current AP %pM - assume out of " - "range\n", - sdata->dev->name, ifmgd->bssid); - disassoc = 1; - } else - ieee80211_send_probe_req(sdata, ifmgd->bssid, - ifmgd->ssid, - ifmgd->ssid_len, - NULL, 0); - ifmgd->flags ^= IEEE80211_STA_PROBEREQ_POLL; - } else { - ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL; - if (time_after(jiffies, ifmgd->last_probe + - IEEE80211_PROBE_INTERVAL)) { - ifmgd->last_probe = jiffies; - ieee80211_send_probe_req(sdata, ifmgd->bssid, - ifmgd->ssid, - ifmgd->ssid_len, - NULL, 0); - } - } + disassoc = true; + goto unlock; + } + + if ((ifmgd->flags & IEEE80211_STA_PROBEREQ_POLL) && + time_after(jiffies, sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { + printk(KERN_DEBUG "%s: no probe response from AP %pM " + "- disassociating\n", + sdata->dev->name, ifmgd->bssid); + disassoc = true; + ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL; + goto unlock; + } + + if (time_after(jiffies, + ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) { + printk(KERN_DEBUG "%s: beacon loss from AP %pM " + "- sending probe request\n", + sdata->dev->name, ifmgd->bssid); + ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; + ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, + ifmgd->ssid_len, NULL, 0); + goto unlock; + + } + + if (time_after(jiffies, sta->last_rx + IEEE80211_PROBE_IDLE_TIME)) { + ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; + ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, + ifmgd->ssid_len, NULL, 0); } + unlock: rcu_read_unlock(); if (disassoc) @@ -1374,6 +1377,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, bss_conf->assoc_capability = capab_info; ieee80211_set_associated(sdata, changed); + /* + * initialise the time of last beacon to be the association time, + * otherwise beacon loss check will trigger immediately + */ + ifmgd->last_beacon = jiffies; + ieee80211_associated(sdata); cfg80211_send_rx_assoc(sdata->dev, (u8 *) mgmt, len); } @@ -1422,9 +1431,12 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, size_t len, struct ieee80211_rx_status *rx_status) { + struct ieee80211_if_managed *ifmgd; size_t baselen; struct ieee802_11_elems elems; + ifmgd = &sdata->u.mgd; + if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) return; /* ignore ProbeResp to foreign address */ @@ -1439,11 +1451,14 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, /* direct probe may be part of the association flow */ if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE, - &sdata->u.mgd.request)) { + &ifmgd->request)) { printk(KERN_DEBUG "%s direct probe responded\n", sdata->dev->name); ieee80211_authenticate(sdata); } + + if (ifmgd->flags & IEEE80211_STA_PROBEREQ_POLL) + ifmgd->flags &= ~IEEE80211_STA_PROBEREQ_POLL; } static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index dbfb28465354..eff59f36e8eb 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -850,7 +850,11 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. */ - sta->last_rx = jiffies; + if (rx->sdata->vif.type == NL80211_IFTYPE_STATION && + ieee80211_is_beacon(hdr->frame_control)) { + rx->sdata->u.mgd.last_beacon = jiffies; + } else + sta->last_rx = jiffies; } if (!(rx->flags & IEEE80211_RX_RA_MATCH)) -- cgit v1.2.3 From 9050bdd8589c373e01e41ddbd9a192de2ff01ef0 Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 22 Mar 2009 21:57:21 +0200 Subject: mac80211: disable power save when scanning When software scanning we need to disable power save so that all possible probe responses and beacons are received. For hardware scanning assume that hardware will take care of that and document that assumption. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/scan.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 46f35dc6accb..3bf9839f5916 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -214,6 +214,66 @@ void ieee80211_scan_failed(struct ieee80211_local *local) local->scan_req = NULL; } +/* + * inform AP that we will go to sleep so that it will buffer the frames + * while we scan + */ +static void ieee80211_scan_ps_enable(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + bool ps = false; + + /* FIXME: what to do when local->pspolling is true? */ + + del_timer_sync(&local->dynamic_ps_timer); + cancel_work_sync(&local->dynamic_ps_enable_work); + + if (local->hw.conf.flags & IEEE80211_CONF_PS) { + ps = true; + local->hw.conf.flags &= ~IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + } + + if (!ps || !(local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK)) + /* + * If power save was enabled, no need to send a nullfunc + * frame because AP knows that we are sleeping. But if the + * hardware is creating the nullfunc frame for power save + * status (ie. IEEE80211_HW_PS_NULLFUNC_STACK is not + * enabled) and power save was enabled, the firmware just + * sent a null frame with power save disabled. So we need + * to send a new nullfunc frame to inform the AP that we + * are again sleeping. + */ + ieee80211_send_nullfunc(local, sdata, 1); +} + +/* inform AP that we are awake again, unless power save is enabled */ +static void ieee80211_scan_ps_disable(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + + if (!local->powersave) + ieee80211_send_nullfunc(local, sdata, 0); + else { + /* + * In !IEEE80211_HW_PS_NULLFUNC_STACK case the hardware + * will send a nullfunc frame with the powersave bit set + * even though the AP already knows that we are sleeping. + * This could be avoided by sending a null frame with power + * save bit disabled before enabling the power save, but + * this doesn't gain anything. + * + * When IEEE80211_HW_PS_NULLFUNC_STACK is enabled, no need + * to send a nullfunc frame because AP already knows that + * we are sleeping, let's just enable power save mode in + * hardware. + */ + local->hw.conf.flags |= IEEE80211_CONF_PS; + ieee80211_hw_config(local, IEEE80211_CONF_CHANGE_PS); + } +} + void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); @@ -268,7 +328,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) /* Tell AP we're back */ if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) { - ieee80211_send_nullfunc(local, sdata, 0); + ieee80211_scan_ps_disable(sdata); netif_tx_wake_all_queues(sdata->dev); } } else @@ -441,7 +501,7 @@ int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, if (sdata->vif.type == NL80211_IFTYPE_STATION) { if (sdata->u.mgd.flags & IEEE80211_STA_ASSOCIATED) { netif_tx_stop_all_queues(sdata->dev); - ieee80211_send_nullfunc(local, sdata, 1); + ieee80211_scan_ps_enable(sdata); } } else netif_tx_stop_all_queues(sdata->dev); -- cgit v1.2.3 From a08c1c1ac0c26229ca1ca45d554b209a56edc8be Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 22 Mar 2009 21:57:28 +0200 Subject: cfg80211: add feature to hold bss In beacon filtering there needs to be a way to not expire the BSS even when no beacons are received. Add an interface to cfg80211 to hold BSS and make sure that it's not expired. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- net/wireless/core.h | 2 ++ net/wireless/scan.c | 27 ++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/core.h b/net/wireless/core.h index 6acd483a61f8..97a6fd8b2b03 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -90,6 +90,8 @@ struct cfg80211_internal_bss { struct rb_node rbn; unsigned long ts; struct kref ref; + bool hold; + /* must be last because of priv member */ struct cfg80211_bss pub; }; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 280dbcd02c15..2a00e362f5fe 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -80,7 +80,8 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev) bool expired = false; list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { - if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE)) + if (bss->hold || + !time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE)) continue; list_del(&bss->list); rb_erase(&bss->rbn, &dev->bss_tree); @@ -471,6 +472,30 @@ void cfg80211_unlink_bss(struct wiphy *wiphy, struct cfg80211_bss *pub) } EXPORT_SYMBOL(cfg80211_unlink_bss); +void cfg80211_hold_bss(struct cfg80211_bss *pub) +{ + struct cfg80211_internal_bss *bss; + + if (!pub) + return; + + bss = container_of(pub, struct cfg80211_internal_bss, pub); + bss->hold = true; +} +EXPORT_SYMBOL(cfg80211_hold_bss); + +void cfg80211_unhold_bss(struct cfg80211_bss *pub) +{ + struct cfg80211_internal_bss *bss; + + if (!pub) + return; + + bss = container_of(pub, struct cfg80211_internal_bss, pub); + bss->hold = false; +} +EXPORT_SYMBOL(cfg80211_unhold_bss); + #ifdef CONFIG_WIRELESS_EXT int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, -- cgit v1.2.3 From 04de83815993714a7ba2618f637fa1092a5f664b Mon Sep 17 00:00:00 2001 From: Kalle Valo Date: Sun, 22 Mar 2009 21:57:35 +0200 Subject: mac80211: add beacon filtering support Add IEEE80211_HW_BEACON_FILTERING flag so that driver inform that it supports beacon filtering. Drivers need to call the new function ieee80211_beacon_loss() to notify about beacon loss. Signed-off-by: Kalle Valo Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 2 ++ net/mac80211/iface.c | 3 +++ net/mac80211/mlme.c | 49 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8a617a7fc090..acba78e1a5ca 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -275,6 +275,7 @@ struct ieee80211_if_managed { struct timer_list chswitch_timer; struct work_struct work; struct work_struct chswitch_work; + struct work_struct beacon_loss_work; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; @@ -1086,6 +1087,7 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, int powersave); void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, struct ieee80211_hdr *hdr); +void ieee80211_beacon_loss_work(struct work_struct *work); void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, enum queue_stop_reason reason); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index dd2a276fa8ca..91e8e1bacaaa 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -477,6 +477,9 @@ static int ieee80211_stop(struct net_device *dev) */ cancel_work_sync(&sdata->u.mgd.work); cancel_work_sync(&sdata->u.mgd.chswitch_work); + + cancel_work_sync(&sdata->u.mgd.beacon_loss_work); + /* * When we get here, the interface is marked down. * Call synchronize_rcu() to wait for the RX path diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8f30f4d19da0..7ecda9d59d8a 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -610,6 +610,8 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, bss_info_changed |= ieee80211_handle_bss_capability(sdata, bss->cbss.capability, bss->has_erp_value, bss->erp_value); + cfg80211_hold_bss(&bss->cbss); + ieee80211_rx_bss_put(local, bss); } @@ -751,6 +753,8 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct ieee80211_local *local = sdata->local; + struct ieee80211_conf *conf = &local_to_hw(local)->conf; + struct ieee80211_bss *bss; struct sta_info *sta; u32 changed = 0, config_changed = 0; @@ -774,6 +778,15 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, ieee80211_sta_tear_down_BA_sessions(sta); + bss = ieee80211_rx_bss_get(local, ifmgd->bssid, + conf->channel->center_freq, + ifmgd->ssid, ifmgd->ssid_len); + + if (bss) { + cfg80211_unhold_bss(&bss->cbss); + ieee80211_rx_bss_put(local, bss); + } + if (self_disconnected) { if (deauth) ieee80211_send_deauth_disassoc(sdata, @@ -925,6 +938,33 @@ void ieee80211_sta_rx_notify(struct ieee80211_sub_if_data *sdata, jiffies + IEEE80211_MONITORING_INTERVAL); } +void ieee80211_beacon_loss_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.beacon_loss_work); + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; + + printk(KERN_DEBUG "%s: driver reports beacon loss from AP %pM " + "- sending probe request\n", sdata->dev->name, + sdata->u.mgd.bssid); + + ifmgd->flags |= IEEE80211_STA_PROBEREQ_POLL; + ieee80211_send_probe_req(sdata, ifmgd->bssid, ifmgd->ssid, + ifmgd->ssid_len, NULL, 0); + + mod_timer(&ifmgd->timer, jiffies + IEEE80211_MONITORING_INTERVAL); +} + +void ieee80211_beacon_loss(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + queue_work(sdata->local->hw.workqueue, + &sdata->u.mgd.beacon_loss_work); +} +EXPORT_SYMBOL(ieee80211_beacon_loss); + static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; @@ -959,7 +999,13 @@ static void ieee80211_associated(struct ieee80211_sub_if_data *sdata) goto unlock; } - if (time_after(jiffies, + /* + * Beacon filtering is only enabled with power save and then the + * stack should not check for beacon loss. + */ + if (!((local->hw.flags & IEEE80211_HW_BEACON_FILTER) && + (local->hw.conf.flags & IEEE80211_CONF_PS)) && + time_after(jiffies, ifmgd->last_beacon + IEEE80211_MONITORING_INTERVAL)) { printk(KERN_DEBUG "%s: beacon loss from AP %pM " "- sending probe request\n", @@ -1869,6 +1915,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) ifmgd = &sdata->u.mgd; INIT_WORK(&ifmgd->work, ieee80211_sta_work); INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); + INIT_WORK(&ifmgd->beacon_loss_work, ieee80211_beacon_loss_work); setup_timer(&ifmgd->timer, ieee80211_sta_timer, (unsigned long) sdata); setup_timer(&ifmgd->chswitch_timer, ieee80211_chswitch_timer, -- cgit v1.2.3 From 2b874e83c970b45c328ab12239b066a43505454c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 14:10:22 +0100 Subject: mac80211: rate control status only for controlled packets This patch changes mac80211 to not notify the rate control algorithm's tx_status() method when reporting status for a packet that didn't go through the rate control algorithm's get_rate() method. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/rate.c | 6 ++++-- net/mac80211/rate.h | 4 +++- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 3fa7ab285066..4641f00a1e5c 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -219,10 +219,12 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, info->control.rates[i].count = 1; } - if (sta && sdata->force_unicast_rateidx > -1) + if (sta && sdata->force_unicast_rateidx > -1) { info->control.rates[0].idx = sdata->force_unicast_rateidx; - else + } else { ref->ops->get_rate(ref->priv, ista, priv_sta, txrc); + info->flags |= IEEE80211_TX_INTFL_RCALGO; + } /* * try to enforce the maximum rate the user wanted diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index b9164c9a9563..2ab5ad9e71ce 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -44,8 +44,10 @@ static inline void rate_control_tx_status(struct ieee80211_local *local, struct rate_control_ref *ref = local->rate_ctrl; struct ieee80211_sta *ista = &sta->sta; void *priv_sta = sta->rate_ctrl_priv; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); + if (likely(info->flags & IEEE80211_TX_INTFL_RCALGO)) + ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); } -- cgit v1.2.3 From 3832c287f11ba001bbe48e9be8c59cb9f71f6b43 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Mar 2009 08:46:57 +0100 Subject: mac80211: fix RX path My previous patch ("mac80211: remove mixed-cell and userspace MLME code") was too obvious to me, so obvious that a stupid bug crept in. The IBSS RX function must be invoked for IBSS, of course, not anything != IBSS. Reported-by: Larry Finger Signed-off-by: Johannes Berg Tested-by: Larry Finger Signed-off-by: John W. Linville --- net/mac80211/rx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index eff59f36e8eb..64ebe664effc 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1884,7 +1884,7 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) if (ieee80211_vif_is_mesh(&sdata->vif)) return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status); - if (sdata->vif.type != NL80211_IFTYPE_ADHOC) + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) return ieee80211_ibss_rx_mgmt(sdata, rx->skb, rx->status); if (sdata->vif.type == NL80211_IFTYPE_STATION) -- cgit v1.2.3 From 4bbf4d56583dd52c429d88f43cb614bdbe5deea6 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 24 Mar 2009 09:35:46 +0100 Subject: cfg80211: fix locking in nl80211_set_wiphy Luis reports that there's a circular locking dependency; this is because cfg80211_dev_rename() will acquire the cfg80211_mutex while the device mutex is held, while this normally is done the other way around. The solution is to open-code the device-getting in nl80211_set_wiphy and require holding the mutex around cfg80211_dev_rename rather than acquiring it within. Also fix a bug -- rtnl locking is expected by drivers so we need to provide it. Reported-by: Luis R. Rodriguez Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/wireless/core.c | 30 ++++++++++-------------------- net/wireless/core.h | 3 +++ net/wireless/nl80211.c | 28 ++++++++++++++++++++-------- 3 files changed, 33 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index 17fe39049740..d1f556535f6d 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -87,7 +87,7 @@ struct wiphy *wiphy_idx_to_wiphy(int wiphy_idx) } /* requires cfg80211_mutex to be held! */ -static struct cfg80211_registered_device * +struct cfg80211_registered_device * __cfg80211_drv_from_info(struct genl_info *info) { int ifindex; @@ -176,13 +176,14 @@ void cfg80211_put_dev(struct cfg80211_registered_device *drv) mutex_unlock(&drv->mtx); } +/* requires cfg80211_mutex to be held */ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, char *newname) { struct cfg80211_registered_device *drv; int wiphy_idx, taken = -1, result, digits; - mutex_lock(&cfg80211_mutex); + assert_cfg80211_lock(); /* prohibit calling the thing phy%d when %d is not its number */ sscanf(newname, PHY_NAME "%d%n", &wiphy_idx, &taken); @@ -195,30 +196,23 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, * deny the name if it is phy where is printed * without leading zeroes. taken == strlen(newname) here */ - result = -EINVAL; if (taken == strlen(PHY_NAME) + digits) - goto out_unlock; + return -EINVAL; } /* Ignore nop renames */ - result = 0; if (strcmp(newname, dev_name(&rdev->wiphy.dev)) == 0) - goto out_unlock; + return 0; /* Ensure another device does not already have this name. */ - list_for_each_entry(drv, &cfg80211_drv_list, list) { - result = -EINVAL; + list_for_each_entry(drv, &cfg80211_drv_list, list) if (strcmp(newname, dev_name(&drv->wiphy.dev)) == 0) - goto out_unlock; - } + return -EINVAL; - /* this will only check for collisions in sysfs - * which is not even always compiled in. - */ result = device_rename(&rdev->wiphy.dev, newname); if (result) - goto out_unlock; + return result; if (rdev->wiphy.debugfsdir && !debugfs_rename(rdev->wiphy.debugfsdir->d_parent, @@ -228,13 +222,9 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, printk(KERN_ERR "cfg80211: failed to rename debugfs dir to %s!\n", newname); - result = 0; -out_unlock: - mutex_unlock(&cfg80211_mutex); - if (result == 0) - nl80211_notify_dev_rename(rdev); + nl80211_notify_dev_rename(rdev); - return result; + return 0; } /* exported functions */ diff --git a/net/wireless/core.h b/net/wireless/core.h index 97a6fd8b2b03..d43daa236ef9 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -99,6 +99,9 @@ struct cfg80211_internal_bss { struct cfg80211_registered_device *cfg80211_drv_by_wiphy_idx(int wiphy_idx); int get_wiphy_idx(struct wiphy *wiphy); +struct cfg80211_registered_device * +__cfg80211_drv_from_info(struct genl_info *info); + /* * This function returns a pointer to the driver * that the genl_info item that is passed refers to. diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8808431bd581..353e1a4ece83 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -366,16 +366,26 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) int result = 0, rem_txq_params = 0; struct nlattr *nl_txq_params; - rdev = cfg80211_get_dev_from_info(info); - if (IS_ERR(rdev)) - return PTR_ERR(rdev); + rtnl_lock(); + + mutex_lock(&cfg80211_mutex); + + rdev = __cfg80211_drv_from_info(info); + if (IS_ERR(rdev)) { + result = PTR_ERR(rdev); + goto unlock; + } - if (info->attrs[NL80211_ATTR_WIPHY_NAME]) { + mutex_lock(&rdev->mtx); + + if (info->attrs[NL80211_ATTR_WIPHY_NAME]) result = cfg80211_dev_rename( rdev, nla_data(info->attrs[NL80211_ATTR_WIPHY_NAME])); - if (result) - goto bad_res; - } + + mutex_unlock(&cfg80211_mutex); + + if (result) + goto bad_res; if (info->attrs[NL80211_ATTR_WIPHY_TXQ_PARAMS]) { struct ieee80211_txq_params txq_params; @@ -471,7 +481,9 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) bad_res: - cfg80211_put_dev(rdev); + mutex_unlock(&rdev->mtx); + unlock: + rtnl_unlock(); return result; } -- cgit v1.2.3 From 2de8e0d999b8790861cd3749bec2236ccc1c8110 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 17:28:35 +0100 Subject: mac80211: rewrite fragmentation Fragmentation currently uses an allocated array to store the fragment skbs, and then keeps track of which have been sent and which are still pending etc. This is rather complicated; make it simpler by just chaining the fragments into skb->next and removing from that list when sent. Also simplifies all code that needs to touch fragments, since it now only needs to walk the skb->next list. This is a prerequisite for fixing the stored packet code, which I need to do for proper aggregation packet storing. Signed-off-by: Johannes Berg Reviewed-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 7 - net/mac80211/tx.c | 322 +++++++++++++++++++++------------------------ net/mac80211/util.c | 19 +-- net/mac80211/wep.c | 21 +-- net/mac80211/wpa.c | 28 ++-- 5 files changed, 173 insertions(+), 224 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index acba78e1a5ca..785f6363a6fc 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -149,11 +149,6 @@ struct ieee80211_tx_data { struct ieee80211_channel *channel; - /* Extra fragments (in addition to the first fragment - * in skb) */ - struct sk_buff **extra_frag; - int num_extra_frag; - u16 ethertype; unsigned int flags; }; @@ -191,8 +186,6 @@ struct ieee80211_rx_data { struct ieee80211_tx_stored_packet { struct sk_buff *skb; - struct sk_buff **extra_frag; - int num_extra_frag; }; struct beacon_data { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f3f240c69018..51bf49cc75bc 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -34,8 +34,7 @@ #define IEEE80211_TX_OK 0 #define IEEE80211_TX_AGAIN 1 -#define IEEE80211_TX_FRAG_AGAIN 2 -#define IEEE80211_TX_PENDING 3 +#define IEEE80211_TX_PENDING 2 /* misc utils */ @@ -702,17 +701,62 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) return TX_CONTINUE; } +static int ieee80211_fragment(struct ieee80211_local *local, + struct sk_buff *skb, int hdrlen, + int frag_threshold) +{ + struct sk_buff *tail = skb, *tmp; + int per_fragm = frag_threshold - hdrlen - FCS_LEN; + int pos = hdrlen + per_fragm; + int rem = skb->len - hdrlen - per_fragm; + + if (WARN_ON(rem < 0)) + return -EINVAL; + + while (rem) { + int fraglen = per_fragm; + + if (fraglen > rem) + fraglen = rem; + rem -= fraglen; + tmp = dev_alloc_skb(local->tx_headroom + + frag_threshold + + IEEE80211_ENCRYPT_HEADROOM + + IEEE80211_ENCRYPT_TAILROOM); + if (!tmp) + return -ENOMEM; + tail->next = tmp; + tail = tmp; + skb_reserve(tmp, local->tx_headroom + + IEEE80211_ENCRYPT_HEADROOM); + /* copy control information */ + memcpy(tmp->cb, skb->cb, sizeof(tmp->cb)); + skb_copy_queue_mapping(tmp, skb); + tmp->priority = skb->priority; + tmp->do_not_encrypt = skb->do_not_encrypt; + tmp->dev = skb->dev; + tmp->iif = skb->iif; + + /* copy header and data */ + memcpy(skb_put(tmp, hdrlen), skb->data, hdrlen); + memcpy(skb_put(tmp, fraglen), skb->data + pos, fraglen); + + pos += fraglen; + } + + skb->len = hdrlen + per_fragm; + return 0; +} + static ieee80211_tx_result debug_noinline ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - size_t hdrlen, per_fragm, num_fragm, payload_len, left; - struct sk_buff **frags, *first, *frag; - int i; - u16 seq; - u8 *pos; + struct sk_buff *skb = tx->skb; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; int frag_threshold = tx->local->fragmentation_threshold; + int hdrlen; + int fragnum; if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) return TX_CONTINUE; @@ -725,58 +769,35 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) return TX_DROP; - first = tx->skb; - hdrlen = ieee80211_hdrlen(hdr->frame_control); - payload_len = first->len - hdrlen; - per_fragm = frag_threshold - hdrlen - FCS_LEN; - num_fragm = DIV_ROUND_UP(payload_len, per_fragm); - - frags = kzalloc(num_fragm * sizeof(struct sk_buff *), GFP_ATOMIC); - if (!frags) - goto fail; - - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); - seq = le16_to_cpu(hdr->seq_ctrl) & IEEE80211_SCTL_SEQ; - pos = first->data + hdrlen + per_fragm; - left = payload_len - per_fragm; - for (i = 0; i < num_fragm - 1; i++) { - struct ieee80211_hdr *fhdr; - size_t copylen; - - if (left <= 0) - goto fail; - /* reserve enough extra head and tail room for possible - * encryption */ - frag = frags[i] = - dev_alloc_skb(tx->local->tx_headroom + - frag_threshold + - IEEE80211_ENCRYPT_HEADROOM + - IEEE80211_ENCRYPT_TAILROOM); - if (!frag) - goto fail; - - /* Make sure that all fragments use the same priority so - * that they end up using the same TX queue */ - frag->priority = first->priority; + /* internal error, why is TX_FRAGMENTED set? */ + if (WARN_ON(skb->len <= frag_threshold)) + return TX_DROP; - skb_reserve(frag, tx->local->tx_headroom + - IEEE80211_ENCRYPT_HEADROOM); + /* + * Now fragment the frame. This will allocate all the fragments and + * chain them (using skb as the first fragment) to skb->next. + * During transmission, we will remove the successfully transmitted + * fragments from this list. When the low-level driver rejects one + * of the fragments then we will simply pretend to accept the skb + * but store it away as pending. + */ + if (ieee80211_fragment(tx->local, skb, hdrlen, frag_threshold)) + return TX_DROP; - /* copy TX information */ - info = IEEE80211_SKB_CB(frag); - memcpy(info, first->cb, sizeof(frag->cb)); + /* update duration/seq/flags of fragments */ + fragnum = 0; + do { + int next_len; + const __le16 morefrags = cpu_to_le16(IEEE80211_FCTL_MOREFRAGS); - /* copy/fill in 802.11 header */ - fhdr = (struct ieee80211_hdr *) skb_put(frag, hdrlen); - memcpy(fhdr, first->data, hdrlen); - fhdr->seq_ctrl = cpu_to_le16(seq | ((i + 1) & IEEE80211_SCTL_FRAG)); + hdr = (void *)skb->data; + info = IEEE80211_SKB_CB(skb); - if (i == num_fragm - 2) { - /* clear MOREFRAGS bit for the last fragment */ - fhdr->frame_control &= cpu_to_le16(~IEEE80211_FCTL_MOREFRAGS); - } else { + if (skb->next) { + hdr->frame_control |= morefrags; + next_len = skb->next->len; /* * No multi-rate retries for fragmented frames, that * would completely throw off the NAV at other STAs. @@ -787,37 +808,16 @@ ieee80211_tx_h_fragment(struct ieee80211_tx_data *tx) info->control.rates[4].idx = -1; BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5); info->flags &= ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; + } else { + hdr->frame_control &= ~morefrags; + next_len = 0; } - - /* copy data */ - copylen = left > per_fragm ? per_fragm : left; - memcpy(skb_put(frag, copylen), pos, copylen); - - skb_copy_queue_mapping(frag, first); - - frag->do_not_encrypt = first->do_not_encrypt; - frag->dev = first->dev; - frag->iif = first->iif; - - pos += copylen; - left -= copylen; - } - skb_trim(first, hdrlen + per_fragm); - - tx->num_extra_frag = num_fragm - 1; - tx->extra_frag = frags; + hdr->duration_id = ieee80211_duration(tx, 0, next_len); + hdr->seq_ctrl |= cpu_to_le16(fragnum & IEEE80211_SCTL_FRAG); + fragnum++; + } while ((skb = skb->next)); return TX_CONTINUE; - - fail: - if (frags) { - for (i = 0; i < num_fragm - 1; i++) - if (frags[i]) - dev_kfree_skb(frags[i]); - kfree(frags); - } - I802_DEBUG_INC(tx->local->tx_handlers_drop_fragment); - return TX_DROP; } static ieee80211_tx_result debug_noinline @@ -845,27 +845,19 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - int next_len, i; - int group_addr = is_multicast_ether_addr(hdr->addr1); - - if (!(tx->flags & IEEE80211_TX_FRAGMENTED)) { - hdr->duration_id = ieee80211_duration(tx, group_addr, 0); - return TX_CONTINUE; - } - - hdr->duration_id = ieee80211_duration(tx, group_addr, - tx->extra_frag[0]->len); + struct sk_buff *skb = tx->skb; + struct ieee80211_hdr *hdr; + int next_len; + bool group_addr; - for (i = 0; i < tx->num_extra_frag; i++) { - if (i + 1 < tx->num_extra_frag) - next_len = tx->extra_frag[i + 1]->len; - else - next_len = 0; + do { + hdr = (void *) skb->data; + next_len = skb->next ? skb->next->len : 0; + group_addr = is_multicast_ether_addr(hdr->addr1); - hdr = (struct ieee80211_hdr *)tx->extra_frag[i]->data; - hdr->duration_id = ieee80211_duration(tx, 0, next_len); - } + hdr->duration_id = + ieee80211_duration(tx, group_addr, next_len); + } while ((skb = skb->next)); return TX_CONTINUE; } @@ -873,19 +865,16 @@ ieee80211_tx_h_calculate_duration(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_stats(struct ieee80211_tx_data *tx) { - int i; + struct sk_buff *skb = tx->skb; if (!tx->sta) return TX_CONTINUE; tx->sta->tx_packets++; - tx->sta->tx_fragments++; - tx->sta->tx_bytes += tx->skb->len; - if (tx->extra_frag) { - tx->sta->tx_fragments += tx->num_extra_frag; - for (i = 0; i < tx->num_extra_frag; i++) - tx->sta->tx_bytes += tx->extra_frag[i]->len; - } + do { + tx->sta->tx_fragments++; + tx->sta->tx_bytes += skb->len; + } while ((skb = skb->next)); return TX_CONTINUE; } @@ -1099,45 +1088,36 @@ static int ieee80211_tx_prepare(struct ieee80211_local *local, return 0; } -static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, +static int __ieee80211_tx(struct ieee80211_local *local, struct ieee80211_tx_data *tx) { + struct sk_buff *skb = tx->skb, *next; struct ieee80211_tx_info *info; - int ret, i; + int ret; + bool fragm = false; - if (skb) { + local->mdev->trans_start = jiffies; + + while (skb) { if (ieee80211_queue_stopped(&local->hw, skb_get_queue_mapping(skb))) return IEEE80211_TX_PENDING; - ret = local->ops->tx(local_to_hw(local), skb); - if (ret) - return IEEE80211_TX_AGAIN; - local->mdev->trans_start = jiffies; - ieee80211_led_tx(local, 1); - } - if (tx->extra_frag) { - for (i = 0; i < tx->num_extra_frag; i++) { - if (!tx->extra_frag[i]) - continue; - info = IEEE80211_SKB_CB(tx->extra_frag[i]); + if (fragm) { + info = IEEE80211_SKB_CB(skb); info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT); - if (ieee80211_queue_stopped(&local->hw, - skb_get_queue_mapping(tx->extra_frag[i]))) - return IEEE80211_TX_FRAG_AGAIN; - - ret = local->ops->tx(local_to_hw(local), - tx->extra_frag[i]); - if (ret) - return IEEE80211_TX_FRAG_AGAIN; - local->mdev->trans_start = jiffies; - ieee80211_led_tx(local, 1); - tx->extra_frag[i] = NULL; } - kfree(tx->extra_frag); - tx->extra_frag = NULL; + + next = skb->next; + ret = local->ops->tx(local_to_hw(local), skb); + if (ret != NETDEV_TX_OK) + return IEEE80211_TX_AGAIN; + tx->skb = skb = next; + ieee80211_led_tx(local, 1); + fragm = true; } + return IEEE80211_TX_OK; } @@ -1149,7 +1129,6 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; ieee80211_tx_result res = TX_DROP; - int i; #define CALL_TXH(txh) \ res = txh(tx); \ @@ -1173,11 +1152,13 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) txh_done: if (unlikely(res == TX_DROP)) { I802_DEBUG_INC(tx->local->tx_handlers_drop); - dev_kfree_skb(skb); - for (i = 0; i < tx->num_extra_frag; i++) - if (tx->extra_frag[i]) - dev_kfree_skb(tx->extra_frag[i]); - kfree(tx->extra_frag); + while (skb) { + struct sk_buff *next; + + next = skb->next; + dev_kfree_skb(skb); + skb = next; + } return -1; } else if (unlikely(res == TX_QUEUED)) { I802_DEBUG_INC(tx->local->tx_handlers_queued); @@ -1194,7 +1175,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) struct ieee80211_tx_data tx; ieee80211_tx_result res_prepare; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int ret, i; + int ret; u16 queue; queue = skb_get_queue_mapping(skb); @@ -1225,7 +1206,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) goto out; retry: - ret = __ieee80211_tx(local, skb, &tx); + ret = __ieee80211_tx(local, &tx); if (ret) { struct ieee80211_tx_stored_packet *store; @@ -1240,9 +1221,6 @@ retry: store = &local->pending_packet[queue]; - if (ret == IEEE80211_TX_FRAG_AGAIN) - skb = NULL; - set_bit(queue, local->queues_pending); smp_mb(); /* @@ -1260,22 +1238,23 @@ retry: clear_bit(queue, local->queues_pending); goto retry; } - store->skb = skb; - store->extra_frag = tx.extra_frag; - store->num_extra_frag = tx.num_extra_frag; + store->skb = tx.skb; } out: rcu_read_unlock(); return 0; drop: - if (skb) - dev_kfree_skb(skb); - for (i = 0; i < tx.num_extra_frag; i++) - if (tx.extra_frag[i]) - dev_kfree_skb(tx.extra_frag[i]); - kfree(tx.extra_frag); rcu_read_unlock(); + + skb = tx.skb; + while (skb) { + struct sk_buff *next; + + next = skb->next; + dev_kfree_skb(skb); + skb = next; + } return 0; } @@ -1810,17 +1789,21 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, */ void ieee80211_clear_tx_pending(struct ieee80211_local *local) { - int i, j; - struct ieee80211_tx_stored_packet *store; + struct sk_buff *skb; + int i; for (i = 0; i < local->hw.queues; i++) { if (!test_bit(i, local->queues_pending)) continue; - store = &local->pending_packet[i]; - kfree_skb(store->skb); - for (j = 0; j < store->num_extra_frag; j++) - kfree_skb(store->extra_frag[j]); - kfree(store->extra_frag); + + skb = local->pending_packet[i].skb; + while (skb) { + struct sk_buff *next; + + next = skb->next; + dev_kfree_skb(skb); + skb = next; + } clear_bit(i, local->queues_pending); } } @@ -1854,14 +1837,11 @@ void ieee80211_tx_pending(unsigned long data) netif_start_subqueue(local->mdev, i); store = &local->pending_packet[i]; - tx.extra_frag = store->extra_frag; - tx.num_extra_frag = store->num_extra_frag; tx.flags = 0; - ret = __ieee80211_tx(local, store->skb, &tx); - if (ret) { - if (ret == IEEE80211_TX_FRAG_AGAIN) - store->skb = NULL; - } else { + tx.skb = store->skb; + ret = __ieee80211_tx(local, &tx); + store->skb = tx.skb; + if (!ret) { clear_bit(i, local->queues_pending); ieee80211_wake_queue(&local->hw, i); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 444bb14c95e1..021166c8cce2 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -166,18 +166,13 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr) void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx) { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) tx->skb->data; - - hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (tx->extra_frag) { - struct ieee80211_hdr *fhdr; - int i; - for (i = 0; i < tx->num_extra_frag; i++) { - fhdr = (struct ieee80211_hdr *) - tx->extra_frag[i]->data; - fhdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - } - } + struct sk_buff *skb = tx->skb; + struct ieee80211_hdr *hdr; + + do { + hdr = (struct ieee80211_hdr *) skb->data; + hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); + } while ((skb = skb->next)); } int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 7043ddc75498..ef73105b3061 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -329,24 +329,17 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) ieee80211_tx_result ieee80211_crypto_wep_encrypt(struct ieee80211_tx_data *tx) { - int i; + struct sk_buff *skb; ieee80211_tx_set_protected(tx); - if (wep_encrypt_skb(tx, tx->skb) < 0) { - I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); - return TX_DROP; - } - - if (tx->extra_frag) { - for (i = 0; i < tx->num_extra_frag; i++) { - if (wep_encrypt_skb(tx, tx->extra_frag[i])) { - I802_DEBUG_INC(tx->local-> - tx_handlers_drop_wep); - return TX_DROP; - } + skb = tx->skb; + do { + if (wep_encrypt_skb(tx, skb) < 0) { + I802_DEBUG_INC(tx->local->tx_handlers_drop_wep); + return TX_DROP; } - } + } while ((skb = skb->next)); return TX_CONTINUE; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 9101b48ec2ae..4f8bfea278f2 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -196,19 +196,13 @@ ieee80211_tx_result ieee80211_crypto_tkip_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; - int i; ieee80211_tx_set_protected(tx); - if (tkip_encrypt_skb(tx, skb) < 0) - return TX_DROP; - - if (tx->extra_frag) { - for (i = 0; i < tx->num_extra_frag; i++) { - if (tkip_encrypt_skb(tx, tx->extra_frag[i])) - return TX_DROP; - } - } + do { + if (tkip_encrypt_skb(tx, skb) < 0) + return TX_DROP; + } while ((skb = skb->next)); return TX_CONTINUE; } @@ -428,19 +422,13 @@ ieee80211_tx_result ieee80211_crypto_ccmp_encrypt(struct ieee80211_tx_data *tx) { struct sk_buff *skb = tx->skb; - int i; ieee80211_tx_set_protected(tx); - if (ccmp_encrypt_skb(tx, skb) < 0) - return TX_DROP; - - if (tx->extra_frag) { - for (i = 0; i < tx->num_extra_frag; i++) { - if (ccmp_encrypt_skb(tx, tx->extra_frag[i])) - return TX_DROP; - } - } + do { + if (ccmp_encrypt_skb(tx, skb) < 0) + return TX_DROP; + } while ((skb = skb->next)); return TX_CONTINUE; } -- cgit v1.2.3 From f0e72851f7ad108fed20426b46a18ab5fcd5729f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 17:28:36 +0100 Subject: mac80211: fix A-MPDU queue assignment Internally, mac80211 requires the skb's queue mapping to be set to the AC queue, not the virtual A-MPDU queue. This is not done correctly currently, this patch moves the code down to directly before the driver is invoked and adds a comment that it will be moved into the driver later. Since this requires __ieee80211_tx() to have the sta pointer, make sure to provide it in ieee80211_tx_pending(). Signed-off-by: Johannes Berg Reviewed-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/tx.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 51bf49cc75bc..0d97cad84b1b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1024,13 +1024,8 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, spin_lock_irqsave(&tx->sta->lock, flags); state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; - if (*state == HT_AGG_STATE_OPERATIONAL) { + if (*state == HT_AGG_STATE_OPERATIONAL) info->flags |= IEEE80211_TX_CTL_AMPDU; - if (local->hw.ampdu_queues) - skb_set_queue_mapping( - skb, tx->local->hw.queues + - tx->sta->tid_to_tx_q[tid]); - } spin_unlock_irqrestore(&tx->sta->lock, flags); } @@ -1103,10 +1098,29 @@ static int __ieee80211_tx(struct ieee80211_local *local, skb_get_queue_mapping(skb))) return IEEE80211_TX_PENDING; - if (fragm) { - info = IEEE80211_SKB_CB(skb); + info = IEEE80211_SKB_CB(skb); + + if (fragm) info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT); + + /* + * Internally, we need to have the queue mapping point to + * the real AC queue, not the virtual A-MPDU queue. This + * now finally sets the queue to what the driver wants. + * We will later move this down into the only driver that + * needs it, iwlwifi. + */ + if (tx->sta && local->hw.ampdu_queues && + info->flags & IEEE80211_TX_CTL_AMPDU) { + unsigned long flags; + u8 *qc = ieee80211_get_qos_ctl((void *) skb->data); + int tid = *qc & IEEE80211_QOS_CTL_TID_MASK; + + spin_lock_irqsave(&tx->sta->lock, flags); + skb_set_queue_mapping(skb, local->hw.queues + + tx->sta->tid_to_tx_q[tid]); + spin_unlock_irqrestore(&tx->sta->lock, flags); } next = skb->next; @@ -1817,9 +1831,11 @@ void ieee80211_tx_pending(unsigned long data) struct ieee80211_local *local = (struct ieee80211_local *)data; struct net_device *dev = local->mdev; struct ieee80211_tx_stored_packet *store; + struct ieee80211_hdr *hdr; struct ieee80211_tx_data tx; int i, ret; + rcu_read_lock(); netif_tx_lock_bh(dev); for (i = 0; i < local->hw.queues; i++) { /* Check that this queue is ok */ @@ -1839,6 +1855,8 @@ void ieee80211_tx_pending(unsigned long data) store = &local->pending_packet[i]; tx.flags = 0; tx.skb = store->skb; + hdr = (struct ieee80211_hdr *)tx.skb->data; + tx.sta = sta_info_get(local, hdr->addr1); ret = __ieee80211_tx(local, &tx); store->skb = tx.skb; if (!ret) { @@ -1847,6 +1865,7 @@ void ieee80211_tx_pending(unsigned long data) } } netif_tx_unlock_bh(dev); + rcu_read_unlock(); } /* functions for drivers to get certain frames */ -- cgit v1.2.3 From 2a577d98712a284a612dd51d69db5cb989810dc2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 17:28:37 +0100 Subject: mac80211: rework the pending packets code The pending packets code is quite incomprehensible, uses memory barriers nobody really understands, etc. This patch reworks it entirely, using the queue spinlock, proper stop bits and the skb queues themselves to indicate whether packets are pending or not (rather than a separate variable like before). Signed-off-by: Johannes Berg Reviewed-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/ieee80211_i.h | 9 +-- net/mac80211/main.c | 2 + net/mac80211/tx.c | 144 +++++++++++++++++++++++++-------------------- net/mac80211/util.c | 22 ++++--- 4 files changed, 98 insertions(+), 79 deletions(-) (limited to 'net') diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 785f6363a6fc..6ce62e553dc2 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -184,10 +184,6 @@ struct ieee80211_rx_data { u16 tkip_iv16; }; -struct ieee80211_tx_stored_packet { - struct sk_buff *skb; -}; - struct beacon_data { u8 *head, *tail; int head_len, tail_len; @@ -583,6 +579,7 @@ enum queue_stop_reason { IEEE80211_QUEUE_STOP_REASON_CSA, IEEE80211_QUEUE_STOP_REASON_AGGREGATION, IEEE80211_QUEUE_STOP_REASON_SUSPEND, + IEEE80211_QUEUE_STOP_REASON_PENDING, }; struct ieee80211_master_priv { @@ -639,9 +636,7 @@ struct ieee80211_local { struct sta_info *sta_hash[STA_HASH_SIZE]; struct timer_list sta_cleanup; - unsigned long queues_pending[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)]; - unsigned long queues_pending_run[BITS_TO_LONGS(IEEE80211_MAX_QUEUES)]; - struct ieee80211_tx_stored_packet pending_packet[IEEE80211_MAX_QUEUES]; + struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; /* number of interfaces with corresponding IFF_ flags */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index dac68d476bff..a7430e98c531 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -781,6 +781,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, sta_info_init(local); + for (i = 0; i < IEEE80211_MAX_QUEUES; i++) + skb_queue_head_init(&local->pending[i]); tasklet_init(&local->tx_pending_tasklet, ieee80211_tx_pending, (unsigned long)local); tasklet_disable(&local->tx_pending_tasklet); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 0d97cad84b1b..ee1b77f8a804 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1189,12 +1189,14 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) struct ieee80211_tx_data tx; ieee80211_tx_result res_prepare; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int ret; + struct sk_buff *next; + unsigned long flags; + int ret, retries; u16 queue; queue = skb_get_queue_mapping(skb); - WARN_ON(test_bit(queue, local->queues_pending)); + WARN_ON(!skb_queue_empty(&local->pending[queue])); if (unlikely(skb->len < 10)) { dev_kfree_skb(skb); @@ -1219,40 +1221,52 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) if (invoke_tx_handlers(&tx)) goto out; -retry: + retries = 0; + retry: ret = __ieee80211_tx(local, &tx); - if (ret) { - struct ieee80211_tx_stored_packet *store; - + switch (ret) { + case IEEE80211_TX_OK: + break; + case IEEE80211_TX_AGAIN: /* * Since there are no fragmented frames on A-MPDU * queues, there's no reason for a driver to reject * a frame there, warn and drop it. */ - if (ret != IEEE80211_TX_PENDING) - if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) - goto drop; + if (WARN_ON(info->flags & IEEE80211_TX_CTL_AMPDU)) + goto drop; + /* fall through */ + case IEEE80211_TX_PENDING: + skb = tx.skb; - store = &local->pending_packet[queue]; + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - set_bit(queue, local->queues_pending); - smp_mb(); - /* - * When the driver gets out of buffers during sending of - * fragments and calls ieee80211_stop_queue, the netif - * subqueue is stopped. There is, however, a small window - * in which the PENDING bit is not yet set. If a buffer - * gets available in that window (i.e. driver calls - * ieee80211_wake_queue), we would end up with ieee80211_tx - * called with the PENDING bit still set. Prevent this by - * continuing transmitting here when that situation is - * possible to have happened. - */ - if (!__netif_subqueue_stopped(local->mdev, queue)) { - clear_bit(queue, local->queues_pending); + if (__netif_subqueue_stopped(local->mdev, queue)) { + do { + next = skb->next; + skb->next = NULL; + skb_queue_tail(&local->pending[queue], skb); + } while ((skb = next)); + + /* + * Make sure nobody will enable the queue on us + * (without going through the tasklet) nor disable the + * netdev queue underneath the pending handling code. + */ + __set_bit(IEEE80211_QUEUE_STOP_REASON_PENDING, + &local->queue_stop_reasons[queue]); + + spin_unlock_irqrestore(&local->queue_stop_reason_lock, + flags); + } else { + spin_unlock_irqrestore(&local->queue_stop_reason_lock, + flags); + + retries++; + if (WARN(retries > 10, "tx refused but queue active")) + goto drop; goto retry; } - store->skb = tx.skb; } out: rcu_read_unlock(); @@ -1263,8 +1277,6 @@ retry: skb = tx.skb; while (skb) { - struct sk_buff *next; - next = skb->next; dev_kfree_skb(skb); skb = next; @@ -1803,23 +1815,10 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, */ void ieee80211_clear_tx_pending(struct ieee80211_local *local) { - struct sk_buff *skb; int i; - for (i = 0; i < local->hw.queues; i++) { - if (!test_bit(i, local->queues_pending)) - continue; - - skb = local->pending_packet[i].skb; - while (skb) { - struct sk_buff *next; - - next = skb->next; - dev_kfree_skb(skb); - skb = next; - } - clear_bit(i, local->queues_pending); - } + for (i = 0; i < local->hw.queues; i++) + skb_queue_purge(&local->pending[i]); } /* @@ -1830,40 +1829,57 @@ void ieee80211_tx_pending(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *)data; struct net_device *dev = local->mdev; - struct ieee80211_tx_stored_packet *store; struct ieee80211_hdr *hdr; + unsigned long flags; struct ieee80211_tx_data tx; int i, ret; + bool next; rcu_read_lock(); netif_tx_lock_bh(dev); - for (i = 0; i < local->hw.queues; i++) { - /* Check that this queue is ok */ - if (__netif_subqueue_stopped(local->mdev, i) && - !test_bit(i, local->queues_pending_run)) - continue; - if (!test_bit(i, local->queues_pending)) { - clear_bit(i, local->queues_pending_run); - ieee80211_wake_queue(&local->hw, i); + for (i = 0; i < local->hw.queues; i++) { + /* + * If queue is stopped by something other than due to pending + * frames, or we have no pending frames, proceed to next queue. + */ + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + next = false; + if (local->queue_stop_reasons[i] != + BIT(IEEE80211_QUEUE_STOP_REASON_PENDING) || + skb_queue_empty(&local->pending[i])) + next = true; + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + + if (next) continue; - } - clear_bit(i, local->queues_pending_run); + /* + * start the queue now to allow processing our packets, + * we're under the tx lock here anyway so nothing will + * happen as a result of this + */ netif_start_subqueue(local->mdev, i); - store = &local->pending_packet[i]; - tx.flags = 0; - tx.skb = store->skb; - hdr = (struct ieee80211_hdr *)tx.skb->data; - tx.sta = sta_info_get(local, hdr->addr1); - ret = __ieee80211_tx(local, &tx); - store->skb = tx.skb; - if (!ret) { - clear_bit(i, local->queues_pending); - ieee80211_wake_queue(&local->hw, i); + while (!skb_queue_empty(&local->pending[i])) { + tx.flags = 0; + tx.skb = skb_dequeue(&local->pending[i]); + hdr = (struct ieee80211_hdr *)tx.skb->data; + tx.sta = sta_info_get(local, hdr->addr1); + + ret = __ieee80211_tx(local, &tx); + if (ret != IEEE80211_TX_OK) { + skb_queue_head(&local->pending[i], tx.skb); + break; + } } + + /* Start regular packet processing again. */ + if (skb_queue_empty(&local->pending[i])) + ieee80211_wake_queue_by_reason(&local->hw, i, + IEEE80211_QUEUE_STOP_REASON_PENDING); } + netif_tx_unlock_bh(dev); rcu_read_unlock(); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 021166c8cce2..0247d8022f5f 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -365,16 +365,16 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, __clear_bit(reason, &local->queue_stop_reasons[queue]); + if (!skb_queue_empty(&local->pending[queue]) && + local->queue_stop_reasons[queue] == + BIT(IEEE80211_QUEUE_STOP_REASON_PENDING)) + tasklet_schedule(&local->tx_pending_tasklet); + if (local->queue_stop_reasons[queue] != 0) /* someone still has this queue stopped */ return; - if (test_bit(queue, local->queues_pending)) { - set_bit(queue, local->queues_pending_run); - tasklet_schedule(&local->tx_pending_tasklet); - } else { - netif_wake_subqueue(local->mdev, queue); - } + netif_wake_subqueue(local->mdev, queue); } void ieee80211_wake_queue_by_reason(struct ieee80211_hw *hw, int queue, @@ -420,9 +420,15 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION; } - __set_bit(reason, &local->queue_stop_reasons[queue]); + /* + * Only stop if it was previously running, this is necessary + * for correct pending packets handling because there we may + * start (but not wake) the queue and rely on that. + */ + if (!local->queue_stop_reasons[queue]) + netif_stop_subqueue(local->mdev, queue); - netif_stop_subqueue(local->mdev, queue); + __set_bit(reason, &local->queue_stop_reasons[queue]); } void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue, -- cgit v1.2.3 From 1870cd71e87da1a1afb904f2c84086f487a07135 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 17:28:38 +0100 Subject: mac80211: clean up __ieee80211_tx args __ieee80211_tx takes a struct ieee80211_tx_data argument, but only uses a few of its members, namely 'skb' and 'sta'. Make that explicit, so that less internal knowledge is required in ieee80211_tx_pending and the possibility of introducing errors here is removed. Signed-off-by: Johannes Berg Reviewed-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/mac80211/tx.c | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index ee1b77f8a804..b909e4090e93 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1084,9 +1084,10 @@ static int ieee80211_tx_prepare(struct ieee80211_local *local, } static int __ieee80211_tx(struct ieee80211_local *local, - struct ieee80211_tx_data *tx) + struct sk_buff **skbp, + struct sta_info *sta) { - struct sk_buff *skb = tx->skb, *next; + struct sk_buff *skb = *skbp, *next; struct ieee80211_tx_info *info; int ret; bool fragm = false; @@ -1111,23 +1112,23 @@ static int __ieee80211_tx(struct ieee80211_local *local, * We will later move this down into the only driver that * needs it, iwlwifi. */ - if (tx->sta && local->hw.ampdu_queues && + if (sta && local->hw.ampdu_queues && info->flags & IEEE80211_TX_CTL_AMPDU) { unsigned long flags; u8 *qc = ieee80211_get_qos_ctl((void *) skb->data); int tid = *qc & IEEE80211_QOS_CTL_TID_MASK; - spin_lock_irqsave(&tx->sta->lock, flags); + spin_lock_irqsave(&sta->lock, flags); skb_set_queue_mapping(skb, local->hw.queues + - tx->sta->tid_to_tx_q[tid]); - spin_unlock_irqrestore(&tx->sta->lock, flags); + sta->tid_to_tx_q[tid]); + spin_unlock_irqrestore(&sta->lock, flags); } next = skb->next; ret = local->ops->tx(local_to_hw(local), skb); if (ret != NETDEV_TX_OK) return IEEE80211_TX_AGAIN; - tx->skb = skb = next; + *skbp = skb = next; ieee80211_led_tx(local, 1); fragm = true; } @@ -1223,7 +1224,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) retries = 0; retry: - ret = __ieee80211_tx(local, &tx); + ret = __ieee80211_tx(local, &tx.skb, tx.sta); switch (ret) { case IEEE80211_TX_OK: break; @@ -1831,7 +1832,6 @@ void ieee80211_tx_pending(unsigned long data) struct net_device *dev = local->mdev; struct ieee80211_hdr *hdr; unsigned long flags; - struct ieee80211_tx_data tx; int i, ret; bool next; @@ -1862,14 +1862,15 @@ void ieee80211_tx_pending(unsigned long data) netif_start_subqueue(local->mdev, i); while (!skb_queue_empty(&local->pending[i])) { - tx.flags = 0; - tx.skb = skb_dequeue(&local->pending[i]); - hdr = (struct ieee80211_hdr *)tx.skb->data; - tx.sta = sta_info_get(local, hdr->addr1); + struct sk_buff *skb = skb_dequeue(&local->pending[i]); + struct sta_info *sta; + + hdr = (struct ieee80211_hdr *)skb->data; + sta = sta_info_get(local, hdr->addr1); - ret = __ieee80211_tx(local, &tx); + ret = __ieee80211_tx(local, &skb, sta); if (ret != IEEE80211_TX_OK) { - skb_queue_head(&local->pending[i], tx.skb); + skb_queue_head(&local->pending[i], skb); break; } } -- cgit v1.2.3 From b1720231ca07dee3382980f3b25e6581bd2e54e9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 17:28:39 +0100 Subject: mac80211: unify and fix TX aggregation start When TX aggregation becomes operational, we do a number of steps: 1) print a debug message 2) wake the virtual queue 3) notify the driver Unfortunately, 1) and 3) are only done if the driver is first to reply to the aggregation request, it is, however, possible that the remote station replies before the driver! Thus, unify the code for this and call the new function ieee80211_agg_tx_operational in both places where TX aggregation can become operational. Additionally, rename the driver notification from IEEE80211_AMPDU_TX_RESUME to IEEE80211_AMPDU_TX_OPERATIONAL. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 63 ++++++++++++++++++++++----------------------------- 1 file changed, 27 insertions(+), 36 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index e5776ef1717a..fd718e2b29f7 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -404,6 +404,27 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); +static void ieee80211_agg_tx_operational(struct ieee80211_local *local, + struct sta_info *sta, u16 tid) +{ +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); +#endif + + if (local->hw.ampdu_queues) { + /* + * Wake up the A-MPDU queue, we stopped it earlier, + * this will in turn wake the entire AC. + */ + ieee80211_wake_queue_by_reason(&local->hw, + local->hw.queues + sta->tid_to_tx_q[tid], + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + } + + local->ops->ampdu_action(&local->hw, IEEE80211_AMPDU_TX_OPERATIONAL, + &sta->sta, tid, NULL); +} + void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) { struct ieee80211_local *local = hw_to_local(hw); @@ -446,20 +467,8 @@ void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) *state |= HT_ADDBA_DRV_READY_MSK; - if (*state == HT_AGG_STATE_OPERATIONAL) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); -#endif - if (hw->ampdu_queues) { - /* - * Wake up this queue, we stopped it earlier, - * this will in turn wake the entire AC. - */ - ieee80211_wake_queue_by_reason(hw, - hw->queues + sta->tid_to_tx_q[tid], - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - } - } + if (*state == HT_AGG_STATE_OPERATIONAL) + ieee80211_agg_tx_operational(local, sta, tid); out: spin_unlock_bh(&sta->lock); @@ -646,9 +655,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_hw *hw = &local->hw; - u16 capab; - u16 tid, start_seq_num; + u16 capab, tid; u8 *state; capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); @@ -682,26 +689,10 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, *state |= HT_ADDBA_RECEIVED_MSK; - if (hw->ampdu_queues && *state != curstate && - *state == HT_AGG_STATE_OPERATIONAL) { - /* - * Wake up this queue, we stopped it earlier, - * this will in turn wake the entire AC. - */ - ieee80211_wake_queue_by_reason(hw, - hw->queues + sta->tid_to_tx_q[tid], - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - } - sta->ampdu_mlme.addba_req_num[tid] = 0; + if (*state != curstate && *state == HT_AGG_STATE_OPERATIONAL) + ieee80211_agg_tx_operational(local, sta, tid); - if (local->ops->ampdu_action) { - (void)local->ops->ampdu_action(hw, - IEEE80211_AMPDU_TX_RESUME, - &sta->sta, tid, &start_seq_num); - } -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Resuming TX aggregation for tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ + sta->ampdu_mlme.addba_req_num[tid] = 0; } else { sta->ampdu_mlme.addba_req_num[tid]++; ___ieee80211_stop_tx_ba_session(sta, tid, WLAN_BACK_INITIATOR); -- cgit v1.2.3 From a220858d30604902f650074bfac5a7598bc97ea4 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 17:28:40 +0100 Subject: mac80211: add skb length sanity checking We just found a bug in zd1211rw where it would reject packets in the ->tx() method but leave them modified, which would cause retransmit attempts with completely bogus skbs, eventually leading to a panic due to not having enough headroom in those. This patch adds a sanity check to mac80211 to catch such driver mistakes; in this case we warn and drop the skb. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/tx.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b909e4090e93..a0e00c6339ca 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1089,7 +1089,7 @@ static int __ieee80211_tx(struct ieee80211_local *local, { struct sk_buff *skb = *skbp, *next; struct ieee80211_tx_info *info; - int ret; + int ret, len; bool fragm = false; local->mdev->trans_start = jiffies; @@ -1125,7 +1125,12 @@ static int __ieee80211_tx(struct ieee80211_local *local, } next = skb->next; + len = skb->len; ret = local->ops->tx(local_to_hw(local), skb); + if (WARN_ON(ret != NETDEV_TX_OK && skb->len != len)) { + dev_kfree_skb(skb); + ret = NETDEV_TX_OK; + } if (ret != NETDEV_TX_OK) return IEEE80211_TX_AGAIN; *skbp = skb = next; -- cgit v1.2.3 From cd8ffc800ce18e558335c4946b2217864fc16045 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 17:28:41 +0100 Subject: mac80211: fix aggregation to not require queue stop Instead of stopping the entire AC queue when enabling aggregation (which was only done for hardware with aggregation queues) buffer the packets for each station, and release them to the pending skb queue once aggregation is turned on successfully. We get a little more code, but it becomes conceptually simpler and we can remove the entire virtual queue mechanism from mac80211 in a follow-up patch. This changes how mac80211 behaves towards drivers that support aggregation but have no hardware queues -- those drivers will now not be handed packets while the aggregation session is being established, but only after it has been fully established. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 136 ++++++++++++++++++++++++++----------------- net/mac80211/ieee80211_i.h | 8 +++ net/mac80211/main.c | 2 + net/mac80211/sta_info.c | 5 ++ net/mac80211/sta_info.h | 2 + net/mac80211/tx.c | 142 ++++++++++++++++++++++++++++++++++++--------- 6 files changed, 217 insertions(+), 78 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index fd718e2b29f7..64b839bfbf17 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -132,16 +132,6 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, state = &sta->ampdu_mlme.tid_state_tx[tid]; if (local->hw.ampdu_queues) { - if (initiator) { - /* - * Stop the AC queue to avoid issues where we send - * unaggregated frames already before the delba. - */ - ieee80211_stop_queue_by_reason(&local->hw, - local->hw.queues + sta->tid_to_tx_q[tid], - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - } - /* * Pretend the driver woke the queue, just in case * it disabled it before the session was stopped. @@ -158,6 +148,10 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, /* HW shall not deny going back to legacy */ if (WARN_ON(ret)) { *state = HT_AGG_STATE_OPERATIONAL; + /* + * We may have pending packets get stuck in this case... + * Not bothering with a workaround for now. + */ } return ret; @@ -226,13 +220,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) ra, tid); #endif /* CONFIG_MAC80211_HT_DEBUG */ - if (hw->ampdu_queues && ieee80211_ac_from_tid(tid) == 0) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "rejecting on voice AC\n"); -#endif - return -EINVAL; - } - rcu_read_lock(); sta = sta_info_get(local, ra); @@ -267,6 +254,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) } spin_lock_bh(&sta->lock); + spin_lock(&local->ampdu_lock); sdata = sta->sdata; @@ -308,21 +296,19 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) ret = -ENOSPC; goto err_unlock_sta; } - - /* - * If we successfully allocate the session, we can't have - * anything going on on the queue this TID maps into, so - * stop it for now. This is a "virtual" stop using the same - * mechanism that drivers will use. - * - * XXX: queue up frames for this session in the sta_info - * struct instead to avoid hitting all other STAs. - */ - ieee80211_stop_queue_by_reason( - &local->hw, hw->queues + qn, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); } + /* + * While we're asking the driver about the aggregation, + * stop the AC queue so that we don't have to worry + * about frames that came in while we were doing that, + * which would require us to put them to the AC pending + * afterwards which just makes the code more complex. + */ + ieee80211_stop_queue_by_reason( + &local->hw, ieee80211_ac_from_tid(tid), + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + /* prepare A-MPDU MLME for Tx aggregation */ sta->ampdu_mlme.tid_tx[tid] = kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); @@ -336,6 +322,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) goto err_return_queue; } + skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending); + /* Tx timer */ sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = sta_addba_resp_timer_expired; @@ -362,6 +350,12 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) } sta->tid_to_tx_q[tid] = qn; + /* Driver vetoed or OKed, but we can take packets again now */ + ieee80211_wake_queue_by_reason( + &local->hw, ieee80211_ac_from_tid(tid), + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + + spin_unlock(&local->ampdu_lock); spin_unlock_bh(&sta->lock); /* send an addBA request */ @@ -388,15 +382,16 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) sta->ampdu_mlme.tid_tx[tid] = NULL; err_return_queue: if (qn >= 0) { - /* We failed, so start queue again right away. */ - ieee80211_wake_queue_by_reason(hw, hw->queues + qn, - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); /* give queue back to pool */ spin_lock(&local->queue_stop_reason_lock); local->ampdu_ac_queue[qn] = -1; spin_unlock(&local->queue_stop_reason_lock); } + ieee80211_wake_queue_by_reason( + &local->hw, ieee80211_ac_from_tid(tid), + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); err_unlock_sta: + spin_unlock(&local->ampdu_lock); spin_unlock_bh(&sta->lock); unlock: rcu_read_unlock(); @@ -404,6 +399,45 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) } EXPORT_SYMBOL(ieee80211_start_tx_ba_session); +/* + * splice packets from the STA's pending to the local pending, + * requires a call to ieee80211_agg_splice_finish and holding + * local->ampdu_lock across both calls. + */ +static void ieee80211_agg_splice_packets(struct ieee80211_local *local, + struct sta_info *sta, u16 tid) +{ + unsigned long flags; + u16 queue = ieee80211_ac_from_tid(tid); + + ieee80211_stop_queue_by_reason( + &local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); + + if (!skb_queue_empty(&sta->ampdu_mlme.tid_tx[tid]->pending)) { + spin_lock_irqsave(&local->queue_stop_reason_lock, flags); + /* mark queue as pending, it is stopped already */ + __set_bit(IEEE80211_QUEUE_STOP_REASON_PENDING, + &local->queue_stop_reasons[queue]); + /* copy over remaining packets */ + skb_queue_splice_tail_init( + &sta->ampdu_mlme.tid_tx[tid]->pending, + &local->pending[queue]); + spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + } +} + +static void ieee80211_agg_splice_finish(struct ieee80211_local *local, + struct sta_info *sta, u16 tid) +{ + u16 queue = ieee80211_ac_from_tid(tid); + + ieee80211_wake_queue_by_reason( + &local->hw, queue, + IEEE80211_QUEUE_STOP_REASON_AGGREGATION); +} + +/* caller must hold sta->lock */ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, struct sta_info *sta, u16 tid) { @@ -411,15 +445,16 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); #endif - if (local->hw.ampdu_queues) { - /* - * Wake up the A-MPDU queue, we stopped it earlier, - * this will in turn wake the entire AC. - */ - ieee80211_wake_queue_by_reason(&local->hw, - local->hw.queues + sta->tid_to_tx_q[tid], - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - } + spin_lock(&local->ampdu_lock); + ieee80211_agg_splice_packets(local, sta, tid); + /* + * NB: we rely on sta->lock being taken in the TX + * processing here when adding to the pending queue, + * otherwise we could only change the state of the + * session to OPERATIONAL _here_. + */ + ieee80211_agg_splice_finish(local, sta, tid); + spin_unlock(&local->ampdu_lock); local->ops->ampdu_action(&local->hw, IEEE80211_AMPDU_TX_OPERATIONAL, &sta->sta, tid, NULL); @@ -602,22 +637,19 @@ void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); spin_lock_bh(&sta->lock); + spin_lock(&local->ampdu_lock); - if (*state & HT_AGG_STATE_INITIATOR_MSK && - hw->ampdu_queues) { - /* - * Wake up this queue, we stopped it earlier, - * this will in turn wake the entire AC. - */ - ieee80211_wake_queue_by_reason(hw, - hw->queues + sta->tid_to_tx_q[tid], - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - } + ieee80211_agg_splice_packets(local, sta, tid); *state = HT_AGG_STATE_IDLE; + /* from now on packets are no longer put onto sta->pending */ sta->ampdu_mlme.addba_req_num[tid] = 0; kfree(sta->ampdu_mlme.tid_tx[tid]); sta->ampdu_mlme.tid_tx[tid] = NULL; + + ieee80211_agg_splice_finish(local, sta, tid); + + spin_unlock(&local->ampdu_lock); spin_unlock_bh(&sta->lock); rcu_read_unlock(); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 6ce62e553dc2..32345b479adb 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -639,6 +639,14 @@ struct ieee80211_local { struct sk_buff_head pending[IEEE80211_MAX_QUEUES]; struct tasklet_struct tx_pending_tasklet; + /* + * This lock is used to prevent concurrent A-MPDU + * session start/stop processing, this thus also + * synchronises the ->ampdu_action() callback to + * drivers and limits it to one at a time. + */ + spinlock_t ampdu_lock; + /* number of interfaces with corresponding IFF_ flags */ atomic_t iff_allmultis, iff_promiscs; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index a7430e98c531..756284e0bbd3 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -795,6 +795,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, skb_queue_head_init(&local->skb_queue); skb_queue_head_init(&local->skb_queue_unreliable); + spin_lock_init(&local->ampdu_lock); + return local_to_hw(local); } EXPORT_SYMBOL(ieee80211_alloc_hw); diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 4ba3c540fcf3..dd3593c1fd23 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -239,6 +239,11 @@ void sta_info_destroy(struct sta_info *sta) tid_tx = sta->ampdu_mlme.tid_tx[i]; if (tid_tx) { del_timer_sync(&tid_tx->addba_resp_timer); + /* + * STA removed while aggregation session being + * started? Bit odd, but purge frames anyway. + */ + skb_queue_purge(&tid_tx->pending); kfree(tid_tx); } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 5b223b216e5a..18fd5d1a4422 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -73,11 +73,13 @@ enum ieee80211_sta_info_flags { * struct tid_ampdu_tx - TID aggregation information (Tx). * * @addba_resp_timer: timer for peer's response to addba request + * @pending: pending frames queue -- use sta's spinlock to protect * @ssn: Starting Sequence Number expected to be aggregated. * @dialog_token: dialog token for aggregation session */ struct tid_ampdu_tx { struct timer_list addba_resp_timer; + struct sk_buff_head pending; u16 ssn; u8 dialog_token; }; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index a0e00c6339ca..906ab785db40 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -984,9 +984,9 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, struct ieee80211_hdr *hdr; struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - int hdrlen, tid; u8 *qc, *state; + bool queued = false; memset(tx, 0, sizeof(*tx)); tx->skb = skb; @@ -1013,20 +1013,53 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, */ } + /* + * If this flag is set to true anywhere, and we get here, + * we are doing the needed processing, so remove the flag + * now. + */ + info->flags &= ~IEEE80211_TX_INTFL_NEED_TXPROCESSING; + hdr = (struct ieee80211_hdr *) skb->data; tx->sta = sta_info_get(local, hdr->addr1); - if (tx->sta && ieee80211_is_data_qos(hdr->frame_control)) { + if (tx->sta && ieee80211_is_data_qos(hdr->frame_control) && + (local->hw.flags & IEEE80211_HW_AMPDU_AGGREGATION)) { unsigned long flags; + struct tid_ampdu_tx *tid_tx; + qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; spin_lock_irqsave(&tx->sta->lock, flags); + /* + * XXX: This spinlock could be fairly expensive, but see the + * comment in agg-tx.c:ieee80211_agg_tx_operational(). + * One way to solve this would be to do something RCU-like + * for managing the tid_tx struct and using atomic bitops + * for the actual state -- by introducing an actual + * 'operational' bit that would be possible. It would + * require changing ieee80211_agg_tx_operational() to + * set that bit, and changing the way tid_tx is managed + * everywhere, including races between that bit and + * tid_tx going away (tid_tx being added can be easily + * committed to memory before the 'operational' bit). + */ + tid_tx = tx->sta->ampdu_mlme.tid_tx[tid]; state = &tx->sta->ampdu_mlme.tid_state_tx[tid]; - if (*state == HT_AGG_STATE_OPERATIONAL) + if (*state == HT_AGG_STATE_OPERATIONAL) { info->flags |= IEEE80211_TX_CTL_AMPDU; + } else if (*state != HT_AGG_STATE_IDLE) { + /* in progress */ + queued = true; + info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING; + __skb_queue_tail(&tid_tx->pending, skb); + } spin_unlock_irqrestore(&tx->sta->lock, flags); + + if (unlikely(queued)) + return TX_QUEUED; } if (is_multicast_ether_addr(hdr->addr1)) { @@ -1077,7 +1110,14 @@ static int ieee80211_tx_prepare(struct ieee80211_local *local, } if (unlikely(!dev)) return -ENODEV; - /* initialises tx with control */ + /* + * initialises tx with control + * + * return value is safe to ignore here because this function + * can only be invoked for multicast frames + * + * XXX: clean up + */ __ieee80211_tx_prepare(tx, skb, dev); dev_put(dev); return 0; @@ -1188,7 +1228,8 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) return 0; } -static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) +static void ieee80211_tx(struct net_device *dev, struct sk_buff *skb, + bool txpending) { struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct sta_info *sta; @@ -1202,11 +1243,11 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) queue = skb_get_queue_mapping(skb); - WARN_ON(!skb_queue_empty(&local->pending[queue])); + WARN_ON(!txpending && !skb_queue_empty(&local->pending[queue])); if (unlikely(skb->len < 10)) { dev_kfree_skb(skb); - return 0; + return; } rcu_read_lock(); @@ -1214,10 +1255,13 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) /* initialises tx */ res_prepare = __ieee80211_tx_prepare(&tx, skb, dev); - if (res_prepare == TX_DROP) { + if (unlikely(res_prepare == TX_DROP)) { dev_kfree_skb(skb); rcu_read_unlock(); - return 0; + return; + } else if (unlikely(res_prepare == TX_QUEUED)) { + rcu_read_unlock(); + return; } sta = tx.sta; @@ -1251,7 +1295,12 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) do { next = skb->next; skb->next = NULL; - skb_queue_tail(&local->pending[queue], skb); + if (unlikely(txpending)) + skb_queue_head(&local->pending[queue], + skb); + else + skb_queue_tail(&local->pending[queue], + skb); } while ((skb = next)); /* @@ -1276,7 +1325,7 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) } out: rcu_read_unlock(); - return 0; + return; drop: rcu_read_unlock(); @@ -1287,7 +1336,6 @@ static int ieee80211_tx(struct net_device *dev, struct sk_buff *skb) dev_kfree_skb(skb); skb = next; } - return 0; } /* device xmit handlers */ @@ -1346,7 +1394,6 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) FOUND_SDATA, UNKNOWN_ADDRESS, } monitor_iface = NOT_MONITOR; - int ret; if (skb->iif) odev = dev_get_by_index(&init_net, skb->iif); @@ -1360,7 +1407,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) "originating device\n", dev->name); #endif dev_kfree_skb(skb); - return 0; + return NETDEV_TX_OK; } if ((local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK) && @@ -1389,7 +1436,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) else if (mesh_nexthop_lookup(skb, osdata)) { dev_put(odev); - return 0; + return NETDEV_TX_OK; } if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0) IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh, @@ -1451,7 +1498,7 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) if (ieee80211_skb_resize(osdata->local, skb, headroom, may_encrypt)) { dev_kfree_skb(skb); dev_put(odev); - return 0; + return NETDEV_TX_OK; } if (osdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -1460,10 +1507,11 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) u.ap); if (likely(monitor_iface != UNKNOWN_ADDRESS)) info->control.vif = &osdata->vif; - ret = ieee80211_tx(odev, skb); + + ieee80211_tx(odev, skb, false); dev_put(odev); - return ret; + return NETDEV_TX_OK; } int ieee80211_monitor_start_xmit(struct sk_buff *skb, @@ -1827,6 +1875,54 @@ void ieee80211_clear_tx_pending(struct ieee80211_local *local) skb_queue_purge(&local->pending[i]); } +static bool ieee80211_tx_pending_skb(struct ieee80211_local *local, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_sub_if_data *sdata; + struct sta_info *sta; + struct ieee80211_hdr *hdr; + struct net_device *dev; + int ret; + bool result = true; + + /* does interface still exist? */ + dev = dev_get_by_index(&init_net, skb->iif); + if (!dev) { + dev_kfree_skb(skb); + return true; + } + + /* validate info->control.vif against skb->iif */ + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + + if (unlikely(info->control.vif && info->control.vif != &sdata->vif)) { + dev_kfree_skb(skb); + result = true; + goto out; + } + + if (info->flags & IEEE80211_TX_INTFL_NEED_TXPROCESSING) { + ieee80211_tx(dev, skb, true); + } else { + hdr = (struct ieee80211_hdr *)skb->data; + sta = sta_info_get(local, hdr->addr1); + + ret = __ieee80211_tx(local, &skb, sta); + if (ret != IEEE80211_TX_OK) + result = false; + } + + out: + dev_put(dev); + + return result; +} + /* * Transmit all pending packets. Called from tasklet, locks master device * TX lock so that no new packets can come in. @@ -1835,9 +1931,8 @@ void ieee80211_tx_pending(unsigned long data) { struct ieee80211_local *local = (struct ieee80211_local *)data; struct net_device *dev = local->mdev; - struct ieee80211_hdr *hdr; unsigned long flags; - int i, ret; + int i; bool next; rcu_read_lock(); @@ -1868,13 +1963,8 @@ void ieee80211_tx_pending(unsigned long data) while (!skb_queue_empty(&local->pending[i])) { struct sk_buff *skb = skb_dequeue(&local->pending[i]); - struct sta_info *sta; - - hdr = (struct ieee80211_hdr *)skb->data; - sta = sta_info_get(local, hdr->addr1); - ret = __ieee80211_tx(local, &skb, sta); - if (ret != IEEE80211_TX_OK) { + if (!ieee80211_tx_pending_skb(local, skb)) { skb_queue_head(&local->pending[i], skb); break; } -- cgit v1.2.3 From e4e72fb4de93e3d4047a4ee3f08778422e17ed0d Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 23 Mar 2009 17:28:42 +0100 Subject: mac80211/iwlwifi: move virtual A-MDPU queue bookkeeping to iwlwifi This patch removes all the virtual A-MPDU-queue bookkeeping from mac80211. Curiously, iwlwifi already does its own bookkeeping, so it doesn't require much changes except where it needs to handle starting and stopping the queues in mac80211. To handle the queue stop/wake properly, we rewrite the software queue number for aggregation frames and internally to iwlwifi keep track of the queues that map into the same AC queue, and only talk to mac80211 about the AC queue. The implementation requires calling two new functions, iwl_stop_queue and iwl_wake_queue instead of the mac80211 counterparts. Signed-off-by: Johannes Berg Cc: Reinette Chattre Signed-off-by: John W. Linville --- net/mac80211/agg-tx.c | 44 +++-------------------------------- net/mac80211/ieee80211_i.h | 7 +----- net/mac80211/main.c | 9 ------- net/mac80211/sta_info.c | 12 ---------- net/mac80211/sta_info.h | 2 -- net/mac80211/tx.c | 19 --------------- net/mac80211/util.c | 58 ++++++---------------------------------------- 7 files changed, 11 insertions(+), 140 deletions(-) (limited to 'net') diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 64b839bfbf17..947aaaad35d2 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -131,14 +131,6 @@ static int ___ieee80211_stop_tx_ba_session(struct sta_info *sta, u16 tid, state = &sta->ampdu_mlme.tid_state_tx[tid]; - if (local->hw.ampdu_queues) { - /* - * Pretend the driver woke the queue, just in case - * it disabled it before the session was stopped. - */ - ieee80211_wake_queue( - &local->hw, local->hw.queues + sta->tid_to_tx_q[tid]); - } *state = HT_AGG_STATE_REQ_STOP_BA_MSK | (initiator << HT_AGG_STATE_INITIATOR_SHIFT); @@ -206,7 +198,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) struct sta_info *sta; struct ieee80211_sub_if_data *sdata; u8 *state; - int i, qn = -1, ret = 0; + int ret = 0; u16 start_seq_num; if (WARN_ON(!local->ops->ampdu_action)) @@ -275,29 +267,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) goto err_unlock_sta; } - if (hw->ampdu_queues) { - spin_lock(&local->queue_stop_reason_lock); - /* reserve a new queue for this session */ - for (i = 0; i < local->hw.ampdu_queues; i++) { - if (local->ampdu_ac_queue[i] < 0) { - qn = i; - local->ampdu_ac_queue[qn] = - ieee80211_ac_from_tid(tid); - break; - } - } - spin_unlock(&local->queue_stop_reason_lock); - - if (qn < 0) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - " - "queue unavailable for tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - ret = -ENOSPC; - goto err_unlock_sta; - } - } - /* * While we're asking the driver about the aggregation, * stop the AC queue so that we don't have to worry @@ -319,7 +288,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) tid); #endif ret = -ENOMEM; - goto err_return_queue; + goto err_wake_queue; } skb_queue_head_init(&sta->ampdu_mlme.tid_tx[tid]->pending); @@ -348,7 +317,6 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) *state = HT_AGG_STATE_IDLE; goto err_free; } - sta->tid_to_tx_q[tid] = qn; /* Driver vetoed or OKed, but we can take packets again now */ ieee80211_wake_queue_by_reason( @@ -380,13 +348,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) err_free: kfree(sta->ampdu_mlme.tid_tx[tid]); sta->ampdu_mlme.tid_tx[tid] = NULL; - err_return_queue: - if (qn >= 0) { - /* give queue back to pool */ - spin_lock(&local->queue_stop_reason_lock); - local->ampdu_ac_queue[qn] = -1; - spin_unlock(&local->queue_stop_reason_lock); - } + err_wake_queue: ieee80211_wake_queue_by_reason( &local->hw, ieee80211_ac_from_tid(tid), IEEE80211_QUEUE_STOP_REASON_AGGREGATION); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 32345b479adb..e6ed78cb16b3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -594,12 +594,7 @@ struct ieee80211_local { const struct ieee80211_ops *ops; - /* AC queue corresponding to each AMPDU queue */ - s8 ampdu_ac_queue[IEEE80211_MAX_AMPDU_QUEUES]; - unsigned int amdpu_ac_stop_refcnt[IEEE80211_MAX_AMPDU_QUEUES]; - - unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES + - IEEE80211_MAX_AMPDU_QUEUES]; + unsigned long queue_stop_reasons[IEEE80211_MAX_QUEUES]; /* also used to protect ampdu_ac_queue and amdpu_ac_stop_refcnt */ spinlock_t queue_stop_reason_lock; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 756284e0bbd3..a6f1d8a869bc 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -774,11 +774,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, setup_timer(&local->dynamic_ps_timer, ieee80211_dynamic_ps_timer, (unsigned long) local); - for (i = 0; i < IEEE80211_MAX_AMPDU_QUEUES; i++) - local->ampdu_ac_queue[i] = -1; - /* using an s8 won't work with more than that */ - BUILD_BUG_ON(IEEE80211_MAX_AMPDU_QUEUES > 127); - sta_info_init(local); for (i = 0; i < IEEE80211_MAX_QUEUES; i++) @@ -874,10 +869,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) */ if (hw->queues > IEEE80211_MAX_QUEUES) hw->queues = IEEE80211_MAX_QUEUES; - if (hw->ampdu_queues > IEEE80211_MAX_AMPDU_QUEUES) - hw->ampdu_queues = IEEE80211_MAX_AMPDU_QUEUES; - if (hw->queues < 4) - hw->ampdu_queues = 0; mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), "wmaster%d", ieee80211_master_setup, diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index dd3593c1fd23..c5f14e6bbde2 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -203,17 +203,6 @@ void sta_info_destroy(struct sta_info *sta) if (tid_rx) tid_rx->shutdown = true; - /* - * The stop callback cannot find this station any more, but - * it didn't complete its work -- start the queue if necessary - */ - if (sta->ampdu_mlme.tid_state_tx[i] & HT_AGG_STATE_INITIATOR_MSK && - sta->ampdu_mlme.tid_state_tx[i] & HT_AGG_STATE_REQ_STOP_BA_MSK && - local->hw.ampdu_queues) - ieee80211_wake_queue_by_reason(&local->hw, - local->hw.queues + sta->tid_to_tx_q[i], - IEEE80211_QUEUE_STOP_REASON_AGGREGATION); - spin_unlock_bh(&sta->lock); /* @@ -292,7 +281,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, * enable session_timer's data differentiation. refer to * sta_rx_agg_session_timer_expired for useage */ sta->timer_to_tid[i] = i; - sta->tid_to_tx_q[i] = -1; /* rx */ sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE; sta->ampdu_mlme.tid_rx[i] = NULL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 18fd5d1a4422..5534d489f506 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -206,7 +206,6 @@ struct sta_ampdu_mlme { * @tid_seq: per-TID sequence numbers for sending to this STA * @ampdu_mlme: A-MPDU state machine state * @timer_to_tid: identity mapping to ID timers - * @tid_to_tx_q: map tid to tx queue (invalid == negative values) * @llid: Local link ID * @plid: Peer link ID * @reason: Cancel reason on PLINK_HOLDING state @@ -281,7 +280,6 @@ struct sta_info { */ struct sta_ampdu_mlme ampdu_mlme; u8 timer_to_tid[STA_TID_NUM]; - s8 tid_to_tx_q[STA_TID_NUM]; #ifdef CONFIG_MAC80211_MESH /* diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 906ab785db40..3fb04a86444d 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1145,25 +1145,6 @@ static int __ieee80211_tx(struct ieee80211_local *local, info->flags &= ~(IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT); - /* - * Internally, we need to have the queue mapping point to - * the real AC queue, not the virtual A-MPDU queue. This - * now finally sets the queue to what the driver wants. - * We will later move this down into the only driver that - * needs it, iwlwifi. - */ - if (sta && local->hw.ampdu_queues && - info->flags & IEEE80211_TX_CTL_AMPDU) { - unsigned long flags; - u8 *qc = ieee80211_get_qos_ctl((void *) skb->data); - int tid = *qc & IEEE80211_QOS_CTL_TID_MASK; - - spin_lock_irqsave(&sta->lock, flags); - skb_set_queue_mapping(skb, local->hw.queues + - sta->tid_to_tx_q[tid]); - spin_unlock_irqrestore(&sta->lock, flags); - } - next = skb->next; len = skb->len; ret = local->ops->tx(local_to_hw(local), skb); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0247d8022f5f..fdf432f14554 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -339,29 +339,8 @@ static void __ieee80211_wake_queue(struct ieee80211_hw *hw, int queue, { struct ieee80211_local *local = hw_to_local(hw); - if (queue >= hw->queues) { - if (local->ampdu_ac_queue[queue - hw->queues] < 0) - return; - - /* - * for virtual aggregation queues, we need to refcount the - * internal mac80211 disable (multiple times!), keep track of - * driver disable _and_ make sure the regular queue is - * actually enabled. - */ - if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION) - local->amdpu_ac_stop_refcnt[queue - hw->queues]--; - else - __clear_bit(reason, &local->queue_stop_reasons[queue]); - - if (local->queue_stop_reasons[queue] || - local->amdpu_ac_stop_refcnt[queue - hw->queues]) - return; - - /* now go on to treat the corresponding regular queue */ - queue = local->ampdu_ac_queue[queue - hw->queues]; - reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION; - } + if (WARN_ON(queue >= hw->queues)) + return; __clear_bit(reason, &local->queue_stop_reasons[queue]); @@ -400,25 +379,8 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue, { struct ieee80211_local *local = hw_to_local(hw); - if (queue >= hw->queues) { - if (local->ampdu_ac_queue[queue - hw->queues] < 0) - return; - - /* - * for virtual aggregation queues, we need to refcount the - * internal mac80211 disable (multiple times!), keep track of - * driver disable _and_ make sure the regular queue is - * actually enabled. - */ - if (reason == IEEE80211_QUEUE_STOP_REASON_AGGREGATION) - local->amdpu_ac_stop_refcnt[queue - hw->queues]++; - else - __set_bit(reason, &local->queue_stop_reasons[queue]); - - /* now go on to treat the corresponding regular queue */ - queue = local->ampdu_ac_queue[queue - hw->queues]; - reason = IEEE80211_QUEUE_STOP_REASON_AGGREGATION; - } + if (WARN_ON(queue >= hw->queues)) + return; /* * Only stop if it was previously running, this is necessary @@ -474,15 +436,9 @@ EXPORT_SYMBOL(ieee80211_stop_queues); int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) { struct ieee80211_local *local = hw_to_local(hw); - unsigned long flags; - if (queue >= hw->queues) { - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - queue = local->ampdu_ac_queue[queue - hw->queues]; - spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - if (queue < 0) - return true; - } + if (WARN_ON(queue >= hw->queues)) + return true; return __netif_subqueue_stopped(local->mdev, queue); } @@ -497,7 +453,7 @@ void ieee80211_wake_queues_by_reason(struct ieee80211_hw *hw, spin_lock_irqsave(&local->queue_stop_reason_lock, flags); - for (i = 0; i < hw->queues + hw->ampdu_queues; i++) + for (i = 0; i < hw->queues; i++) __ieee80211_wake_queue(hw, i, reason); spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); -- cgit v1.2.3 From 8a5117d80fe93de5df5b56480054f7df1fd20755 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Tue, 24 Mar 2009 21:21:07 -0400 Subject: cfg80211: default CONFIG_WIRELESS_OLD_REGULATORY to n And update description and feature-removal schedule according to the new plan. Signed-off-by: Luis R. Rodriguez Signed-off-by: John W. Linville --- net/wireless/Kconfig | 37 +++++++++---------------------------- 1 file changed, 9 insertions(+), 28 deletions(-) (limited to 'net') diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index d1d18f34d272..3c3bc9e579ed 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -12,36 +12,17 @@ config CFG80211_REG_DEBUG config WIRELESS_OLD_REGULATORY bool "Old wireless static regulatory definitions" - default y + default n ---help--- This option enables the old static regulatory information - and uses it within the new framework. This is available - temporarily as an option to help prevent immediate issues - due to the switch to the new regulatory framework which - does require a new userspace application which has the - database of regulatory information (CRDA) and another for - setting regulatory domains (iw). - - For more information see: - - http://wireless.kernel.org/en/developers/Regulatory/CRDA - http://wireless.kernel.org/en/users/Documentation/iw - - It is important to note though that if you *do* have CRDA present - and if this option is enabled CRDA *will* be called to update the - regulatory domain (for US and JP only). Support for letting the user - set the regulatory domain through iw is also supported. This option - mainly exists to leave around for a kernel release some old static - regulatory domains that were defined and to keep around the old - ieee80211_regdom module parameter. This is being phased out and you - should stop using them ASAP. - - Note: You will need CRDA if you want 802.11d support - - Say Y unless you have installed a new userspace application. - Also say Y if have one currently depending on the ieee80211_regdom - module parameter and cannot port it to use the new userspace - interfaces. + and uses it within the new framework. This option is available + for historical reasons and it is advised to leave it off. + + For details see: + + http://wireless.kernel.org/en/developers/Regulatory + + Say N and if you say Y, please tell us why. The default is N. config WIRELESS_EXT bool "Wireless extensions" -- cgit v1.2.3 From 80e20f6f360078b4852eac6825883e5aa25564bb Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 27 Mar 2009 17:22:55 -0700 Subject: Revert "netrom: zero length frame filtering in NetRom" This reverts commit a3ac80a130300573de351083cf4a5b46d233e8bf. Alan Cox says that zero length writes do have special meaning and are useful in this protocol. Signed-off-by: David S. Miller --- net/netrom/af_netrom.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'net') diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index d1c16bbee932..4e705f87969f 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1037,10 +1037,6 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock, unsigned char *asmptr; int size; - /* Netrom empty data frame has no meaning : don't send */ - if (len == 0) - return 0; - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1175,11 +1171,6 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; - /* NetRom empty data frame has no meaning : ignore it */ - if (copied == 0) { - goto out; - } - if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; @@ -1195,7 +1186,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, msg->msg_namelen = sizeof(*sax); -out: skb_free_datagram(sk, skb); + skb_free_datagram(sk, skb); release_sock(sk); return copied; -- cgit v1.2.3 From c44a4366649aca4f5b4a51ff71d4c9cde3b7c9da Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 27 Mar 2009 17:23:42 -0700 Subject: Revert "ax25: zero length frame filtering in AX25" This reverts commit f99bcff7a290768e035f3d4726e103c6ebe858bf. Like netrom, Alan Cox says that zero lengths have real meaning and are useful in this protocol. Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'net') diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 7da5ebb84e97..fd9d06f291dc 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1435,11 +1435,6 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock, size_t size; int lv, err, addr_len = msg->msg_namelen; - /* AX.25 empty data frame has no meaning : don't send */ - if (len == 0) { - return (0); - } - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_EOR|MSG_CMSG_COMPAT)) return -EINVAL; @@ -1639,13 +1634,6 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_reset_transport_header(skb); copied = skb->len; - /* AX.25 empty data frame has no meaning : ignore it */ - if (copied == 0) { - err = copied; - skb_free_datagram(sk, skb); - goto out; - } - if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; -- cgit v1.2.3 From 284904aa79466a4736f4c775fdbe5c7407fa136c Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 27 Mar 2009 17:10:28 -0400 Subject: lsm: Relocate the IPv4 security_inet_conn_request() hooks The current placement of the security_inet_conn_request() hooks do not allow individual LSMs to override the IP options of the connection's request_sock. This is a problem as both SELinux and Smack have the ability to use labeled networking protocols which make use of IP options to carry security attributes and the inability to set the IP options at the start of the TCP handshake is problematic. This patch moves the IPv4 security_inet_conn_request() hooks past the code where the request_sock's IP options are set/reset so that the LSM can safely manipulate the IP options as needed. This patch intentionally does not change the related IPv6 hooks as IPv6 based labeling protocols which use IPv6 options are not currently implemented, once they are we will have a better idea of the correct placement for the IPv6 hooks. Signed-off-by: Paul Moore Acked-by: David S. Miller Signed-off-by: James Morris --- net/ipv4/syncookies.c | 9 +++++---- net/ipv4/tcp_ipv4.c | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d346c22aa6ae..b35a950d2e06 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -288,10 +288,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; - if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); - goto out; - } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; @@ -322,6 +318,11 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, } } + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } + req->expires = 0UL; req->retrans = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d0a314879d81..5d427f86b414 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1230,14 +1230,15 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); -- cgit v1.2.3 From 389fb800ac8be2832efedd19978a2b8ced37eb61 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 27 Mar 2009 17:10:34 -0400 Subject: netlabel: Label incoming TCP connections correctly in SELinux The current NetLabel/SELinux behavior for incoming TCP connections works but only through a series of happy coincidences that rely on the limited nature of standard CIPSO (only able to convey MLS attributes) and the write equality imposed by the SELinux MLS constraints. The problem is that network sockets created as the result of an incoming TCP connection were not on-the-wire labeled based on the security attributes of the parent socket but rather based on the wire label of the remote peer. The issue had to do with how IP options were managed as part of the network stack and where the LSM hooks were in relation to the code which set the IP options on these newly created child sockets. While NetLabel/SELinux did correctly set the socket's on-the-wire label it was promptly cleared by the network stack and reset based on the IP options of the remote peer. This patch, in conjunction with a prior patch that adjusted the LSM hook locations, works to set the correct on-the-wire label format for new incoming connections through the security_inet_conn_request() hook. Besides the correct behavior there are many advantages to this change, the most significant is that all of the NetLabel socket labeling code in SELinux now lives in hooks which can return error codes to the core stack which allows us to finally get ride of the selinux_netlbl_inode_permission() logic which greatly simplfies the NetLabel/SELinux glue code. In the process of developing this patch I also ran into a small handful of AF_INET6 cleanliness issues that have been fixed which should make the code safer and easier to extend in the future. Signed-off-by: Paul Moore Acked-by: Casey Schaufler Signed-off-by: James Morris --- net/ipv4/cipso_ipv4.c | 130 ++++++++++++++++++++++++++++++++---- net/netlabel/netlabel_kapi.c | 152 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 250 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 7bc992976d29..039cc1ffe977 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1942,23 +1942,85 @@ socket_setattr_failure: } /** - * cipso_v4_sock_delattr - Delete the CIPSO option from a socket - * @sk: the socket + * cipso_v4_req_setattr - Add a CIPSO option to a connection request socket + * @req: the connection request socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket * * Description: - * Removes the CIPSO option from a socket, if present. + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. Returns zero on success and + * negative values on failure. * */ -void cipso_v4_sock_delattr(struct sock *sk) +int cipso_v4_req_setattr(struct request_sock *req, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) { - u8 hdr_delta; - struct ip_options *opt; - struct inet_sock *sk_inet; + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 opt_len; + struct ip_options *opt = NULL; + struct inet_request_sock *req_inet; - sk_inet = inet_sk(sk); - opt = sk_inet->opt; - if (opt == NULL || opt->cipso == 0) - return; + /* We allocate the maximum CIPSO option size here so we are probably + * being a little wasteful, but it makes our life _much_ easier later + * on and after all we are only talking about 40 bytes. */ + buf_len = CIPSO_V4_OPT_LEN_MAX; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (buf == NULL) { + ret_val = -ENOMEM; + goto req_setattr_failure; + } + + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + goto req_setattr_failure; + buf_len = ret_val; + + /* We can't use ip_options_get() directly because it makes a call to + * ip_options_get_alloc() which allocates memory with GFP_KERNEL and + * we won't always have CAP_NET_RAW even though we _always_ want to + * set the IPOPT_CIPSO option. */ + opt_len = (buf_len + 3) & ~3; + opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); + if (opt == NULL) { + ret_val = -ENOMEM; + goto req_setattr_failure; + } + memcpy(opt->__data, buf, buf_len); + opt->optlen = opt_len; + opt->cipso = sizeof(struct iphdr); + kfree(buf); + buf = NULL; + + req_inet = inet_rsk(req); + opt = xchg(&req_inet->opt, opt); + kfree(opt); + + return 0; + +req_setattr_failure: + kfree(buf); + kfree(opt); + return ret_val; +} + +/** + * cipso_v4_delopt - Delete the CIPSO option from a set of IP options + * @opt_ptr: IP option pointer + * + * Description: + * Deletes the CIPSO IP option from a set of IP options and makes the necessary + * adjustments to the IP option structure. Returns zero on success, negative + * values on failure. + * + */ +int cipso_v4_delopt(struct ip_options **opt_ptr) +{ + int hdr_delta = 0; + struct ip_options *opt = *opt_ptr; if (opt->srr || opt->rr || opt->ts || opt->router_alert) { u8 cipso_len; @@ -2003,11 +2065,34 @@ void cipso_v4_sock_delattr(struct sock *sk) } else { /* only the cipso option was present on the socket so we can * remove the entire option struct */ - sk_inet->opt = NULL; + *opt_ptr = NULL; hdr_delta = opt->optlen; kfree(opt); } + return hdr_delta; +} + +/** + * cipso_v4_sock_delattr - Delete the CIPSO option from a socket + * @sk: the socket + * + * Description: + * Removes the CIPSO option from a socket, if present. + * + */ +void cipso_v4_sock_delattr(struct sock *sk) +{ + int hdr_delta; + struct ip_options *opt; + struct inet_sock *sk_inet; + + sk_inet = inet_sk(sk); + opt = sk_inet->opt; + if (opt == NULL || opt->cipso == 0) + return; + + hdr_delta = cipso_v4_delopt(&sk_inet->opt); if (sk_inet->is_icsk && hdr_delta > 0) { struct inet_connection_sock *sk_conn = inet_csk(sk); sk_conn->icsk_ext_hdr_len -= hdr_delta; @@ -2015,6 +2100,27 @@ void cipso_v4_sock_delattr(struct sock *sk) } } +/** + * cipso_v4_req_delattr - Delete the CIPSO option from a request socket + * @reg: the request socket + * + * Description: + * Removes the CIPSO option from a request socket, if present. + * + */ +void cipso_v4_req_delattr(struct request_sock *req) +{ + struct ip_options *opt; + struct inet_request_sock *req_inet; + + req_inet = inet_rsk(req); + opt = req_inet->opt; + if (opt == NULL || opt->cipso == 0) + return; + + cipso_v4_delopt(&req_inet->opt); +} + /** * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions * @cipso: the CIPSO v4 option diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index fd9229db075c..cae2f5f4cac0 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -619,8 +619,9 @@ int netlbl_enabled(void) } /** - * netlbl_socket_setattr - Label a socket using the correct protocol + * netlbl_sock_setattr - Label a socket using the correct protocol * @sk: the socket to label + * @family: protocol family * @secattr: the security attributes * * Description: @@ -633,29 +634,45 @@ int netlbl_enabled(void) * */ int netlbl_sock_setattr(struct sock *sk, + u16 family, const struct netlbl_lsm_secattr *secattr) { - int ret_val = -ENOENT; + int ret_val; struct netlbl_dom_map *dom_entry; rcu_read_lock(); dom_entry = netlbl_domhsh_getentry(secattr->domain); - if (dom_entry == NULL) + if (dom_entry == NULL) { + ret_val = -ENOENT; goto socket_setattr_return; - switch (dom_entry->type) { - case NETLBL_NLTYPE_ADDRSELECT: - ret_val = -EDESTADDRREQ; - break; - case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_sock_setattr(sk, - dom_entry->type_def.cipsov4, - secattr); + } + switch (family) { + case AF_INET: + switch (dom_entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + ret_val = -EDESTADDRREQ; + break; + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_sock_setattr(sk, + dom_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } break; - case NETLBL_NLTYPE_UNLABELED: +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ ret_val = 0; break; +#endif /* IPv6 */ default: - ret_val = -ENOENT; + ret_val = -EPROTONOSUPPORT; } socket_setattr_return: @@ -689,9 +706,25 @@ void netlbl_sock_delattr(struct sock *sk) * on failure. * */ -int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +int netlbl_sock_getattr(struct sock *sk, + struct netlbl_lsm_secattr *secattr) { - return cipso_v4_sock_getattr(sk, secattr); + int ret_val; + + switch (sk->sk_family) { + case AF_INET: + ret_val = cipso_v4_sock_getattr(sk, secattr); + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + ret_val = -ENOMSG; + break; +#endif /* IPv6 */ + default: + ret_val = -EPROTONOSUPPORT; + } + + return ret_val; } /** @@ -748,7 +781,7 @@ int netlbl_conn_setattr(struct sock *sk, break; #endif /* IPv6 */ default: - ret_val = 0; + ret_val = -EPROTONOSUPPORT; } conn_setattr_return: @@ -756,6 +789,77 @@ conn_setattr_return: return ret_val; } +/** + * netlbl_req_setattr - Label a request socket using the correct protocol + * @req: the request socket to label + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given socket using the security attributes + * specified in @secattr. Returns zero on success, negative values on failure. + * + */ +int netlbl_req_setattr(struct request_sock *req, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct netlbl_dom_map *dom_entry; + struct netlbl_domaddr4_map *af4_entry; + u32 proto_type; + struct cipso_v4_doi *proto_cv4; + + rcu_read_lock(); + dom_entry = netlbl_domhsh_getentry(secattr->domain); + if (dom_entry == NULL) { + ret_val = -ENOENT; + goto req_setattr_return; + } + switch (req->rsk_ops->family) { + case AF_INET: + if (dom_entry->type == NETLBL_NLTYPE_ADDRSELECT) { + struct inet_request_sock *req_inet = inet_rsk(req); + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + req_inet->rmt_addr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto req_setattr_return; + } + proto_type = af4_entry->type; + proto_cv4 = af4_entry->type_def.cipsov4; + } else { + proto_type = dom_entry->type; + proto_cv4 = dom_entry->type_def.cipsov4; + } + switch (proto_type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_req_setattr(req, proto_cv4, secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + cipso_v4_req_delattr(req); + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = -EPROTONOSUPPORT; + } + +req_setattr_return: + rcu_read_unlock(); + return ret_val; +} + /** * netlbl_skbuff_setattr - Label a packet using the correct protocol * @skb: the packet @@ -808,7 +912,7 @@ int netlbl_skbuff_setattr(struct sk_buff *skb, break; #endif /* IPv6 */ default: - ret_val = 0; + ret_val = -EPROTONOSUPPORT; } skbuff_setattr_return: @@ -833,9 +937,17 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, u16 family, struct netlbl_lsm_secattr *secattr) { - if (CIPSO_V4_OPTEXIST(skb) && - cipso_v4_skbuff_getattr(skb, secattr) == 0) - return 0; + switch (family) { + case AF_INET: + if (CIPSO_V4_OPTEXIST(skb) && + cipso_v4_skbuff_getattr(skb, secattr) == 0) + return 0; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + break; +#endif /* IPv6 */ + } return netlbl_unlabel_getattr(skb, family, secattr); } -- cgit v1.2.3 From 8651d5c0b1f874c5b8307ae2b858bc40f9f02482 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 27 Mar 2009 17:10:48 -0400 Subject: lsm: Remove the socket_post_accept() hook The socket_post_accept() hook is not currently used by any in-tree modules and its existence continues to cause problems by confusing people about what can be safely accomplished using this hook. If a legitimate need for this hook arises in the future it can always be reintroduced. Signed-off-by: Paul Moore Signed-off-by: James Morris --- net/socket.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/socket.c b/net/socket.c index 0b14b79c03af..91d0c0254ffe 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1536,8 +1536,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, fd_install(newfd, newfile); err = newfd; - security_socket_post_accept(sock, newsock); - out_put: fput_light(sock->file, fput_needed); out: -- cgit v1.2.3 From 07feee8f812f7327a46186f7604df312c8c81962 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 27 Mar 2009 17:10:54 -0400 Subject: netlabel: Cleanup the Smack/NetLabel code to fix incoming TCP connections This patch cleans up a lot of the Smack network access control code. The largest changes are to fix the labeling of incoming TCP connections in a manner similar to the recent SELinux changes which use the security_inet_conn_request() hook to label the request_sock and let the label move to the child socket via the normal network stack mechanisms. In addition to the incoming TCP connection fixes this patch also removes the smk_labled field from the socket_smack struct as the minor optimization advantage was outweighed by the difficulty in maintaining it's proper state. Signed-off-by: Paul Moore Acked-by: Casey Schaufler Signed-off-by: James Morris --- net/netlabel/netlabel_kapi.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index cae2f5f4cac0..b0e582f2d37a 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -860,6 +860,19 @@ req_setattr_return: return ret_val; } +/** +* netlbl_req_delattr - Delete all the NetLabel labels on a socket +* @req: the socket +* +* Description: +* Remove all the NetLabel labeling from @req. +* +*/ +void netlbl_req_delattr(struct request_sock *req) +{ + cipso_v4_req_delattr(req); +} + /** * netlbl_skbuff_setattr - Label a packet using the correct protocol * @skb: the packet -- cgit v1.2.3 From 776bd5c7a207de546918f805090bfc823d2660c8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:45:28 -0400 Subject: SUNRPC: Don't flag empty RPCB_GETADDR reply as bogus In 2007, commit e65fe3976f594603ed7b1b4a99d3e9b867f573ea added additional sanity checking to rpcb_decode_getaddr() to make sure we were getting a reply that was long enough to be an actual universal address. If the uaddr string isn't long enough, the XDR decoder returns EIO. However, an empty string is a valid RPCB_GETADDR response if the requested service isn't registered. Moreover, "::.n.m" is also a valid RPCB_GETADDR response for IPv6 addresses that is shorter than rpcb_decode_getaddr()'s lower limit of 11. So this sanity check introduced a regression for rpcbind requests against IPv6 remotes. So revert the lower bound check added by commit e65fe3976f594603ed7b1b4a99d3e9b867f573ea, and add an explicit check for an empty uaddr string, similar to libtirpc's rpcb_getaddr(3). Pointed-out-by: Jeff Layton Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 03ae007641e4..2caa7edeeaba 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -703,11 +703,16 @@ static int rpcb_decode_getaddr(struct rpc_rqst *req, __be32 *p, *portp = 0; addr_len = ntohl(*p++); + if (addr_len == 0) { + dprintk("RPC: rpcb_decode_getaddr: " + "service is not registered\n"); + return 0; + } + /* - * Simple sanity check. The smallest possible universal - * address is an IPv4 address string containing 11 bytes. + * Simple sanity check. */ - if (addr_len < 11 || addr_len > RPCBIND_MAXUADDRLEN) + if (addr_len > RPCBIND_MAXUADDRLEN) goto out_err; /* -- cgit v1.2.3 From 156e62094a74cf43f02f56ef96b6cda567501357 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:45:58 -0400 Subject: SUNRPC: Clean up svc_find_xprt() calling sequence Clean up: add documentating comment and use appropriate data types for svc_find_xprt()'s arguments. This also eliminates a mixed sign comparison: @port was an int, while the return value of svc_xprt_local_port() is an unsigned short. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svc_xprt.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e588df5d6b34..c947c93dbc24 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1033,7 +1033,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) return dr; } -/* +/** + * svc_find_xprt - find an RPC transport instance + * @serv: pointer to svc_serv to search + * @xcl_name: C string containing transport's class name + * @af: Address family of transport's local address + * @port: transport's IP port number + * * Return the transport instance pointer for the endpoint accepting * connections/peer traffic from the specified transport class, * address family and port. @@ -1042,14 +1048,14 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * wild-card, and will result in matching the first transport in the * service's list that has a matching class name. */ -struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, - int af, int port) +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + const sa_family_t af, const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; /* Sanity check the args */ - if (!serv || !xcl_name) + if (serv == NULL || xcl_name == NULL) return found; spin_lock_bh(&serv->sv_lock); @@ -1058,7 +1064,7 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) continue; - if (port && port != svc_xprt_local_port(xprt)) + if (port != 0 && port != svc_xprt_local_port(xprt)) continue; found = xprt; svc_xprt_get(xprt); -- cgit v1.2.3 From 4b62e58cccff9c5e7ffc7023f7ec24c75fbd549b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:06 -0400 Subject: SUNRPC: Pass a family argument to svc_register() The sv_family field is going away. Instead of using sv_family, have the svc_register() function take a protocol family argument. Since this argument represents a protocol family, and not an address family, this argument takes an int, as this is what is passed to sock_create_kern(). Also make sure svc_register's helpers are checking for PF_FOO instead of AF_FOO. The value of [AP]F_FOO are equivalent; this is simply a symbolic change to reflect the semantics of the value stored in that variable. sock_create_kern() should return EPFNOSUPPORT if the passed-in protocol family isn't supported, but it uses EAFNOSUPPORT for this case. We will stick with that tradition here, as svc_register() is called by the RPC server in the same path as sock_create_kern(). Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 21 +++++++++++---------- net/sunrpc/svcsock.c | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c51fed4d1af1..41bc36ea2224 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -800,17 +800,17 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, * if any error occurs. */ static int __svc_register(const u32 program, const u32 version, - const sa_family_t family, + const int family, const unsigned short protocol, const unsigned short port) { int error; switch (family) { - case AF_INET: + case PF_INET: return __svc_rpcb_register4(program, version, protocol, port); - case AF_INET6: + case PF_INET6: error = __svc_rpcb_register6(program, version, protocol, port); if (error < 0) @@ -840,11 +840,11 @@ static int __svc_register(const u32 program, const u32 version, * if any error occurs. */ static int __svc_register(const u32 program, const u32 version, - sa_family_t family, + const int family, const unsigned short protocol, const unsigned short port) { - if (family != AF_INET) + if (family != PF_INET) return -EAFNOSUPPORT; return rpcb_register(program, version, protocol, port); @@ -855,13 +855,14 @@ static int __svc_register(const u32 program, const u32 version, /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register + * @family: protocol family of service's listener socket * @proto: transport protocol number to advertise * @port: port to advertise * - * Service is registered for any address in serv's address family + * Service is registered for any address in the passed-in protocol family */ -int svc_register(const struct svc_serv *serv, const unsigned short proto, - const unsigned short port) +int svc_register(const struct svc_serv *serv, const int family, + const unsigned short proto, const unsigned short port) { struct svc_program *progp; unsigned int i; @@ -879,7 +880,7 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto, i, proto == IPPROTO_UDP? "udp" : "tcp", port, - serv->sv_family, + family, progp->pg_vers[i]->vs_hidden? " (but not telling portmap)" : ""); @@ -887,7 +888,7 @@ int svc_register(const struct svc_serv *serv, const unsigned short proto, continue; error = __svc_register(progp->pg_prog, i, - serv->sv_family, proto, port); + family, proto, port); if (error < 0) break; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5763e6460fea..d00583c1cd04 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1122,7 +1122,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, inet->sk_protocol, + *errp = svc_register(serv, serv->sv_family, inet->sk_protocol, ntohs(inet_sk(inet)->sport)); if (*errp < 0) { -- cgit v1.2.3 From baf01caf09e87579c2d157e5ee29975db8551522 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:13 -0400 Subject: SUNRPC: svc_setup_socket() gets protocol family from socket Since the sv_family field is going away, modify svc_setup_socket() to extract the protocol family from the passed-in socket instead of from the passed-in svc_serv struct. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d00583c1cd04..d00bc3307745 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1122,7 +1122,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* Register socket with portmapper */ if (*errp >= 0 && pmap_register) - *errp = svc_register(serv, serv->sv_family, inet->sk_protocol, + *errp = svc_register(serv, inet->sk_family, inet->sk_protocol, ntohs(inet_sk(inet)->sport)); if (*errp < 0) { @@ -1145,13 +1145,13 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, /* * We start one listener per sv_serv. We want AF_INET - * requests to be automatically shunted to our AF_INET6 + * requests to be automatically shunted to our PF_INET6 * listener using a mapped IPv4 address. Make sure * no-one starts an equivalent IPv4 listener, which * would steal our incoming connections. */ val = 0; - if (serv->sv_family == AF_INET6) + if (inet->sk_family == PF_INET6) kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, (char *)&val, sizeof(val)); -- cgit v1.2.3 From 9652ada3fb5914a67d8422114e8a76388330fa79 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:21 -0400 Subject: SUNRPC: Change svc_create_xprt() to take a @family argument The sv_family field is going away. Pass a protocol family argument to svc_create_xprt() instead of extracting the family from the passed-in svc_serv struct. Again, as this is a listener socket and not an address, we make this new argument an "int" protocol family, instead of an "sa_family_t." Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svc_xprt.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c947c93dbc24..2819ee093f36 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -161,7 +161,9 @@ EXPORT_SYMBOL_GPL(svc_xprt_init); static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct svc_serv *serv, - unsigned short port, int flags) + const int family, + const unsigned short port, + int flags) { struct sockaddr_in sin = { .sin_family = AF_INET, @@ -176,12 +178,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, struct sockaddr *sap; size_t len; - switch (serv->sv_family) { - case AF_INET: + switch (family) { + case PF_INET: sap = (struct sockaddr *)&sin; len = sizeof(sin); break; - case AF_INET6: + case PF_INET6: sap = (struct sockaddr *)&sin6; len = sizeof(sin6); break; @@ -192,7 +194,8 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return xcl->xcl_ops->xpo_create(serv, sap, len, flags); } -int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, +int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, + const int family, const unsigned short port, int flags) { struct svc_xprt_class *xcl; @@ -209,7 +212,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = __svc_xpo_create(xcl, serv, port, flags); + newxprt = __svc_xpo_create(xcl, serv, family, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); -- cgit v1.2.3 From 49a9072f29a1039f142ec98b44a72d7173651c02 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:29 -0400 Subject: SUNRPC: Remove @family argument from svc_create() and svc_create_pooled() Since an RPC service listener's protocol family is specified now via svc_create_xprt(), it no longer needs to be passed to svc_create() or svc_create_pooled(). Remove that argument from the synopsis of those functions, and remove the sv_family field from the svc_serv struct. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 41bc36ea2224..d72ff44826d8 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -359,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - sa_family_t family, void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv)) { struct svc_serv *serv; unsigned int vers; @@ -368,7 +368,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; - serv->sv_family = family; serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; @@ -427,21 +426,21 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - sa_family_t family, void (*shutdown)(struct svc_serv *serv)) + void (*shutdown)(struct svc_serv *serv)) { - return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, shutdown); } EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - sa_family_t family, void (*shutdown)(struct svc_serv *serv), + void (*shutdown)(struct svc_serv *serv), svc_thread_fn func, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, family, shutdown); + serv = __svc_create(prog, bufsize, npools, shutdown); if (serv != NULL) { serv->sv_function = func; -- cgit v1.2.3 From 7d21c0f9845f0ce4e81baac3519fbb2c6c2cc908 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:44 -0400 Subject: SUNRPC: Set IPV6ONLY flag on PF_INET6 RPC listener sockets We are about to convert to using separate RPC listener sockets for PF_INET and PF_INET6. This echoes the way IPv6 is handled in user space by TI-RPC, and eliminates the need for ULPs to worry about mapped IPv4 AF_INET6 addresses when doing address comparisons. Start by setting the IPV6ONLY flag on PF_INET6 RPC listener sockets. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d00bc3307745..ac6cd65220c7 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1144,13 +1144,11 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, svc_tcp_init(svsk, serv); /* - * We start one listener per sv_serv. We want AF_INET - * requests to be automatically shunted to our PF_INET6 - * listener using a mapped IPv4 address. Make sure - * no-one starts an equivalent IPv4 listener, which - * would steal our incoming connections. + * If this is a PF_INET6 listener, we want to avoid + * getting requests from IPv4 remotes. Those should + * be shunted to a PF_INET listener via rpcbind. */ - val = 0; + val = 1; if (inet->sk_family == PF_INET6) kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, (char *)&val, sizeof(val)); -- cgit v1.2.3 From fc28decdc93633a65d54e42498e9e819d466329c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:51 -0400 Subject: SUNRPC: Use IPv4 loopback for registering AF_INET6 kernel RPC services The kernel uses an IPv6 loopback address when registering its AF_INET6 RPC services so that it can tell whether the local portmapper is actually IPv6-enabled. Since the legacy portmapper doesn't listen on IPv6, however, this causes a long timeout on older systems if the kernel happens to try creating and registering an AF_INET6 RPC service. Originally I wanted to use a connected transport (either TCP or connected UDP) so that the upcall would fail immediately if the portmapper wasn't listening on IPv6, but we never agreed on what transport to use. In the end, it's of little consequence to the kernel whether the local portmapper is listening on IPv6. It's only important whether the portmapper supports rpcbind v4. And the kernel can't tell that at all if it is sending requests via IPv6 -- the portmapper will just ignore them. So, send both rpcbind v2 and v4 SET/UNSET requests via IPv4 loopback to maintain better backwards compatibility between new kernels and legacy user space, and prevent multi-second hangs in some cases when the kernel attempts to register RPC services. This patch is part of a series that addresses http://bugzilla.kernel.org/show_bug.cgi?id=12256 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 23 ++++++----------------- 1 file changed, 6 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 2caa7edeeaba..ebce7a5976c9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -124,12 +124,6 @@ static const struct sockaddr_in rpcb_inaddr_loopback = { .sin_port = htons(RPCBIND_PORT), }; -static const struct sockaddr_in6 rpcb_in6addr_loopback = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_LOOPBACK_INIT, - .sin6_port = htons(RPCBIND_PORT), -}; - static struct rpc_clnt *rpcb_create_local(struct sockaddr *addr, size_t addrlen, u32 version) { @@ -176,9 +170,10 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, - u32 version, struct rpc_message *msg) +static int rpcb_register_call(u32 version, struct rpc_message *msg) { + struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; + size_t addrlen = sizeof(rpcb_inaddr_loopback); struct rpc_clnt *rpcb_clnt; int result, error = 0; @@ -254,9 +249,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) if (port) msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, - sizeof(rpcb_inaddr_loopback), - RPCBVERS_2, &msg); + return rpcb_register_call(RPCBVERS_2, &msg); } /* @@ -284,9 +277,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, - sizeof(rpcb_inaddr_loopback), - RPCBVERS_4, msg); + return rpcb_register_call(RPCBVERS_4, msg); } /* @@ -318,9 +309,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, if (port) msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; - return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, - sizeof(rpcb_in6addr_loopback), - RPCBVERS_4, msg); + return rpcb_register_call(RPCBVERS_4, msg); } /** -- cgit v1.2.3 From ba5c35e0c7e30b095636cd58b0854fdbd3c32947 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:46:59 -0400 Subject: SUNRPC: Don't return EPROTONOSUPPORT in svc_register()'s helpers The RPC client returns -EPROTONOSUPPORT if there is a protocol version mismatch (ie the remote RPC server doesn't support the RPC protocol version sent by the client). Helpers for the svc_register() function return -EPROTONOSUPPORT if they don't recognize the passed-in IPPROTO_ value. These are two entirely different failure modes. Have the helpers return -ENOPROTOOPT instead of -EPROTONOSUPPORT. This will allow callers to determine more precisely what the underlying problem is, and decide to report or recover appropriately. This patch is part of a series that addresses http://bugzilla.kernel.org/show_bug.cgi?id=12256 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index d72ff44826d8..17e0d7265dfd 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -749,7 +749,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, netid = RPCBIND_NETID_TCP; break; default: - return -EPROTONOSUPPORT; + return -ENOPROTOOPT; } return rpcb_v4_register(program, version, @@ -785,7 +785,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, netid = RPCBIND_NETID_TCP6; break; default: - return -EPROTONOSUPPORT; + return -ENOPROTOOPT; } return rpcb_v4_register(program, version, -- cgit v1.2.3 From 3aba45536fe8f92aa07bcdfd2fb1cf17eec7d786 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:47:06 -0400 Subject: SUNRPC: Clean up address type casts in rpcb_v4_register() Clean up: Simplify rpcb_v4_register() and its helpers by moving the details of sockaddr type casting to rpcb_v4_register()'s helper functions. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index ebce7a5976c9..44d0732ba874 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -170,7 +170,7 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, return rpc_create(&args); } -static int rpcb_register_call(u32 version, struct rpc_message *msg) +static int rpcb_register_call(const u32 version, struct rpc_message *msg) { struct sockaddr *addr = (struct sockaddr *)&rpcb_inaddr_loopback; size_t addrlen = sizeof(rpcb_inaddr_loopback); @@ -255,17 +255,17 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) /* * Fill in AF_INET family-specific arguments to register */ -static int rpcb_register_netid4(struct sockaddr_in *address_to_register, +static int rpcb_register_netid4(const struct sockaddr *sap, struct rpc_message *msg) { + const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; - unsigned short port = ntohs(address_to_register->sin_port); + unsigned short port = ntohs(sin->sin_port); char buf[32]; /* Construct AF_INET universal address */ snprintf(buf, sizeof(buf), "%pI4.%u.%u", - &address_to_register->sin_addr.s_addr, - port >> 8, port & 0xff); + &sin->sin_addr.s_addr, port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -283,21 +283,21 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, /* * Fill in AF_INET6 family-specific arguments to register */ -static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, +static int rpcb_register_netid6(const struct sockaddr *sap, struct rpc_message *msg) { + const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; - unsigned short port = ntohs(address_to_register->sin6_port); + unsigned short port = ntohs(sin6->sin6_port); char buf[64]; /* Construct AF_INET6 universal address */ - if (ipv6_addr_any(&address_to_register->sin6_addr)) + if (ipv6_addr_any(&sin6->sin6_addr)) snprintf(buf, sizeof(buf), "::.%u.%u", port >> 8, port & 0xff); else snprintf(buf, sizeof(buf), "%pI6.%u.%u", - &address_to_register->sin6_addr, - port >> 8, port & 0xff); + &sin6->sin6_addr, port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -369,11 +369,9 @@ int rpcb_v4_register(const u32 program, const u32 version, switch (address->sa_family) { case AF_INET: - return rpcb_register_netid4((struct sockaddr_in *)address, - &msg); + return rpcb_register_netid4(address, &msg); case AF_INET6: - return rpcb_register_netid6((struct sockaddr_in6 *)address, - &msg); + return rpcb_register_netid6(address, &msg); } return -EAFNOSUPPORT; -- cgit v1.2.3 From 126e4bc3b3b446482696377f67a634c76eaf2e9c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:47:14 -0400 Subject: SUNRPC: rpcbind actually interprets r_owner string RFC 1833 has little to say about the contents of r_owner; it only specifies that it is a string, and states that it is used to control who can UNSET an entry. Our port of rpcbind (from Sun) assumes this string contains a numeric UID value, not alphabetical or symbolic characters, but checks this value only for AF_LOCAL RPCB_SET or RPCB_UNSET requests. In all other cases, rpcbind ignores the contents of the r_owner string. The reference user space implementation of rpcb_set(3) uses a numeric UID for all SET/UNSET requests (even via the network) and an empty string for all other requests. We emulate that behavior here to maintain bug-for-bug compatibility. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 44d0732ba874..d550d0b967db 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -63,9 +63,16 @@ enum { * r_owner * * The "owner" is allowed to unset a service in the rpcbind database. - * We always use the following (arbitrary) fixed string. + * + * For AF_LOCAL SET/UNSET requests, rpcbind treats this string as a + * UID which it maps to a local user name via a password lookup. + * In all other cases it is ignored. + * + * For SET/UNSET requests, user space provides a value, even for + * network requests, and GETADDR uses an empty string. We follow + * those precedents here. */ -#define RPCB_OWNER_STRING "rpcb" +#define RPCB_OWNER_STRING "0" #define RPCB_MAXOWNERLEN sizeof(RPCB_OWNER_STRING) static void rpcb_getport_done(struct rpc_task *, void *); @@ -566,7 +573,7 @@ void rpcb_getport_async(struct rpc_task *task) map->r_xprt = xprt_get(xprt); map->r_netid = rpc_peeraddr2str(clnt, RPC_DISPLAY_NETID); map->r_addr = rpc_peeraddr2str(rpcb_clnt, RPC_DISPLAY_UNIVERSAL_ADDR); - map->r_owner = RPCB_OWNER_STRING; /* ignored for GETADDR */ + map->r_owner = ""; map->r_status = -EIO; child = rpcb_call_async(rpcb_clnt, map, proc); -- cgit v1.2.3 From 1673d0de40ab46cac3b456ad50e1c8d6a31bfd66 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:47:21 -0400 Subject: SUNRPC: Allow callers to pass rpcb_v4_register a NULL address The user space TI-RPC library uses an empty string for the universal address when unregistering all target addresses for [program, version]. The kernel's rpcb client should behave the same way. Here, we are switching between several registration methods based on the protocol family of the incoming address. Rename the other rpcbind v4 registration functions to make it clear that they, as well, are switched on protocol family. In /etc/netconfig, this is either "inet" or "inet6". NB: The loopback protocol families are not supported in the kernel. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index d550d0b967db..8ea8907d4b8d 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -262,8 +262,8 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) /* * Fill in AF_INET family-specific arguments to register */ -static int rpcb_register_netid4(const struct sockaddr *sap, - struct rpc_message *msg) +static int rpcb_register_inet4(const struct sockaddr *sap, + struct rpc_message *msg) { const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; @@ -290,8 +290,8 @@ static int rpcb_register_netid4(const struct sockaddr *sap, /* * Fill in AF_INET6 family-specific arguments to register */ -static int rpcb_register_netid6(const struct sockaddr *sap, - struct rpc_message *msg) +static int rpcb_register_inet6(const struct sockaddr *sap, + struct rpc_message *msg) { const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; @@ -319,6 +319,20 @@ static int rpcb_register_netid6(const struct sockaddr *sap, return rpcb_register_call(RPCBVERS_4, msg); } +static int rpcb_unregister_all_protofamilies(struct rpc_message *msg) +{ + struct rpcbind_args *map = msg->rpc_argp; + + dprintk("RPC: unregistering [%u, %u, '%s'] with " + "local rpcbind\n", + map->r_prog, map->r_vers, map->r_netid); + + map->r_addr = ""; + msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; + + return rpcb_register_call(RPCBVERS_4, msg); +} + /** * rpcb_v4_register - set or unset a port registration with the local rpcbind * @program: RPC program number of service to (un)register @@ -336,10 +350,11 @@ static int rpcb_register_netid6(const struct sockaddr *sap, * invoke this function once for each [program, version, address, * netid] tuple they wish to advertise. * - * Callers may also unregister RPC services that are no longer - * available by setting the port number in the passed-in address - * to zero. Callers pass a netid of "" to unregister all - * transport netids associated with [program, version, address]. + * Callers may also unregister RPC services that are registered at a + * specific address by setting the port number in @address to zero. + * They may unregister all registered protocol families at once for + * a service by passing a NULL @address argument. If @netid is "" + * then all netids for [program, version, address] are unregistered. * * This function uses rpcbind protocol version 4 to contact the * local rpcbind daemon. The local rpcbind daemon must support @@ -374,11 +389,14 @@ int rpcb_v4_register(const u32 program, const u32 version, .rpc_argp = &map, }; + if (address == NULL) + return rpcb_unregister_all_protofamilies(&msg); + switch (address->sa_family) { case AF_INET: - return rpcb_register_netid4(address, &msg); + return rpcb_register_inet4(address, &msg); case AF_INET6: - return rpcb_register_netid6(address, &msg); + return rpcb_register_inet6(address, &msg); } return -EAFNOSUPPORT; -- cgit v1.2.3 From d5a8620f7c8a5bcade730e2fa1224191f289fb00 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:47:29 -0400 Subject: SUNRPC: Simplify svc_unregister() Our initial implementation of svc_unregister() assumed that PMAP_UNSET cleared all rpcbind registrations for a [program, version] tuple. However, we now have evidence that PMAP_UNSET clears only "inet" entries, and not "inet6" entries, in the rpcbind database. For backwards compatibility with the legacy portmapper, the svc_unregister() function also must work if user space doesn't support rpcbind version 4 at all. Thus we'll send an rpcbind v4 UNSET, and if that fails, we'll send a PMAP_UNSET. This simplifies the code in svc_unregister() and provides better backwards compatibility with legacy user space that does not support rpcbind version 4. We can get rid of the conditional compilation in here as well. This patch is part of a series that addresses http://bugzilla.kernel.org/show_bug.cgi?id=12256 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 35 ++++++++++++++--------------------- 1 file changed, 14 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 17e0d7265dfd..bd0ee312dac9 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -896,38 +896,31 @@ int svc_register(const struct svc_serv *serv, const int family, return error; } -#ifdef CONFIG_SUNRPC_REGISTER_V4 - +/* + * If user space is running rpcbind, it should take the v4 UNSET + * and clear everything for this [program, version]. If user space + * is running portmap, it will reject the v4 UNSET, but won't have + * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient + * in this case to clear all existing entries for [program, version]. + */ static void __svc_unregister(const u32 program, const u32 version, const char *progname) { - struct sockaddr_in6 sin6 = { - .sin6_family = AF_INET6, - .sin6_addr = IN6ADDR_ANY_INIT, - .sin6_port = 0, - }; int error; - error = rpcb_v4_register(program, version, - (struct sockaddr *)&sin6, ""); - dprintk("svc: %s(%sv%u), error %d\n", - __func__, progname, version, error); -} - -#else /* CONFIG_SUNRPC_REGISTER_V4 */ + error = rpcb_v4_register(program, version, NULL, ""); -static void __svc_unregister(const u32 program, const u32 version, - const char *progname) -{ - int error; + /* + * User space didn't support rpcbind v4, so retry this + * request with the legacy rpcbind v2 protocol. + */ + if (error == -EPROTONOSUPPORT) + error = rpcb_register(program, version, 0, 0); - error = rpcb_register(program, version, 0, 0); dprintk("svc: %s(%sv%u), error %d\n", __func__, progname, version, error); } -#endif /* CONFIG_SUNRPC_REGISTER_V4 */ - /* * All netids, bind addresses and ports registered for [program, version] * are removed from the local rpcbind database (if the service is not -- cgit v1.2.3 From cadc0fa534e51e20fdffe1623913c163a18d71b1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:47:36 -0400 Subject: SUNRPC: Simplify kernel RPC service registration The kernel registers RPC services with the local portmapper with an rpcbind SET upcall to the local portmapper. Traditionally, this used rpcbind v2 (PMAP), but registering RPC services that support IPv6 requires rpcbind v3 or v4. Since we now want separate PF_INET and PF_INET6 listeners for each kernel RPC service, svc_register() will do only one of those registrations at a time. For PF_INET, it tries an rpcb v4 SET upcall first; if that fails, it does a legacy portmap SET. This makes it entirely backwards compatible with legacy user space, but allows a proper v4 SET to be used if rpcbind is available. For PF_INET6, it does an rpcb v4 SET upcall. If that fails, it fails the registration, and thus the transport creation. This let's the kernel detect if user space is able to support IPv6 RPC services, and thus whether it should maintain a PF_INET6 listener for each service at all. This provides complete backwards compatibilty with legacy user space that only supports rpcbind v2. The only down-side is that registering a new kernel RPC service may take an extra exchange with the local portmapper on legacy systems, but this is an infrequent operation and is done over UDP (no lingering sockets in TIMEWAIT), so it shouldn't be consequential. This patch is part of a series that addresses http://bugzilla.kernel.org/show_bug.cgi?id=12256 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/svc.c | 79 ++++++++++++++++++++++++-------------------------------- 1 file changed, 34 insertions(+), 45 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index bd0ee312dac9..142f64745fba 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -718,8 +718,6 @@ svc_exit_thread(struct svc_rqst *rqstp) } EXPORT_SYMBOL_GPL(svc_exit_thread); -#ifdef CONFIG_SUNRPC_REGISTER_V4 - /* * Register an "inet" protocol family netid with the local * rpcbind daemon via an rpcbind v4 SET request. @@ -734,12 +732,13 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, const unsigned short protocol, const unsigned short port) { - struct sockaddr_in sin = { + const struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; - char *netid; + const char *netid; + int error; switch (protocol) { case IPPROTO_UDP: @@ -752,10 +751,20 @@ static int __svc_rpcb_register4(const u32 program, const u32 version, return -ENOPROTOOPT; } - return rpcb_v4_register(program, version, - (struct sockaddr *)&sin, netid); + error = rpcb_v4_register(program, version, + (const struct sockaddr *)&sin, netid); + + /* + * User space didn't support rpcbind v4, so retry this + * registration request with the legacy rpcbind v2 protocol. + */ + if (error == -EPROTONOSUPPORT) + error = rpcb_register(program, version, protocol, port); + + return error; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) /* * Register an "inet6" protocol family netid with the local * rpcbind daemon via an rpcbind v4 SET request. @@ -770,12 +779,13 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, const unsigned short protocol, const unsigned short port) { - struct sockaddr_in6 sin6 = { + const struct sockaddr_in6 sin6 = { .sin6_family = AF_INET6, .sin6_addr = IN6ADDR_ANY_INIT, .sin6_port = htons(port), }; - char *netid; + const char *netid; + int error; switch (protocol) { case IPPROTO_UDP: @@ -788,9 +798,19 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, return -ENOPROTOOPT; } - return rpcb_v4_register(program, version, - (struct sockaddr *)&sin6, netid); + error = rpcb_v4_register(program, version, + (const struct sockaddr *)&sin6, netid); + + /* + * User space didn't support rpcbind version 4, so we won't + * use a PF_INET6 listener. + */ + if (error == -EPROTONOSUPPORT) + error = -EAFNOSUPPORT; + + return error; } +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ /* * Register a kernel RPC service via rpcbind version 4. @@ -809,48 +829,17 @@ static int __svc_register(const u32 program, const u32 version, case PF_INET: return __svc_rpcb_register4(program, version, protocol, port); + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case PF_INET6: - error = __svc_rpcb_register6(program, version, + return__svc_rpcb_register6(program, version, protocol, port); - if (error < 0) - return error; - - /* - * Work around bug in some versions of Linux rpcbind - * which don't allow registration of both inet and - * inet6 netids. - * - * Error return ignored for now. - */ - __svc_rpcb_register4(program, version, - protocol, port); - return 0; +#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ } return -EAFNOSUPPORT; } -#else /* CONFIG_SUNRPC_REGISTER_V4 */ - -/* - * Register a kernel RPC service via rpcbind version 2. - * - * Returns zero on success; a negative errno value is returned - * if any error occurs. - */ -static int __svc_register(const u32 program, const u32 version, - const int family, - const unsigned short protocol, - const unsigned short port) -{ - if (family != PF_INET) - return -EAFNOSUPPORT; - - return rpcb_register(program, version, protocol, port); -} - -#endif /* CONFIG_SUNRPC_REGISTER_V4 */ - /** * svc_register - register an RPC service with the local portmapper * @serv: svc_serv struct for the service to register -- cgit v1.2.3 From 363f724cdd3d2ae554e261be995abdeb15f7bdd9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:47:44 -0400 Subject: SUNRPC: rpcb_register() should handle errors silently Move error reporting for RPC registration to rpcb_register's caller. This way the caller can choose to recover silently from certain errors, but report errors it does not recognize. Error reporting for kernel RPC service registration is now handled in one place. This patch is part of a series that addresses http://bugzilla.kernel.org/show_bug.cgi?id=12256 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/rpcb_clnt.c | 2 +- net/sunrpc/svc.c | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 8ea8907d4b8d..beee6da33035 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -194,7 +194,7 @@ static int rpcb_register_call(const u32 version, struct rpc_message *msg) error = PTR_ERR(rpcb_clnt); if (error < 0) { - printk(KERN_WARNING "RPC: failed to contact local rpcbind " + dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); return error; } diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 142f64745fba..8ba654bdd608 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -818,26 +818,30 @@ static int __svc_rpcb_register6(const u32 program, const u32 version, * Returns zero on success; a negative errno value is returned * if any error occurs. */ -static int __svc_register(const u32 program, const u32 version, +static int __svc_register(const char *progname, + const u32 program, const u32 version, const int family, const unsigned short protocol, const unsigned short port) { - int error; + int error = -EAFNOSUPPORT; switch (family) { case PF_INET: - return __svc_rpcb_register4(program, version, + error = __svc_rpcb_register4(program, version, protocol, port); break; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case PF_INET6: - return__svc_rpcb_register6(program, version, + error = __svc_rpcb_register6(program, version, protocol, port); #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ } - return -EAFNOSUPPORT; + if (error < 0) + printk(KERN_WARNING "svc: failed to register %sv%u RPC " + "service (errno %d).\n", progname, version, -error); + return error; } /** @@ -875,8 +879,8 @@ int svc_register(const struct svc_serv *serv, const int family, if (progp->pg_vers[i]->vs_hidden) continue; - error = __svc_register(progp->pg_prog, i, - family, proto, port); + error = __svc_register(progp->pg_name, progp->pg_prog, + i, family, proto, port); if (error < 0) break; } -- cgit v1.2.3 From 9355982830ad67dca35e0f3d43319f3d438f82b4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2009 20:47:51 -0400 Subject: SUNRPC: Remove CONFIG_SUNRPC_REGISTER_V4 We just augmented the kernel's RPC service registration code so that it automatically adjusts to what is supported in user space. Thus we no longer need the kernel configuration option to enable registering RPC services with v4 -- it's all done automatically. This patch is part of a series that addresses http://bugzilla.kernel.org/show_bug.cgi?id=12256 Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/Kconfig | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'net') diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 5592883e1e4a..afd91c78ce8e 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -17,28 +17,6 @@ config SUNRPC_XPRT_RDMA If unsure, say N. -config SUNRPC_REGISTER_V4 - bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" - depends on SUNRPC && EXPERIMENTAL - default n - help - Sun added support for registering RPC services at an IPv6 - address by creating two new versions of the rpcbind protocol - (RFC 1833). - - This option enables support in the kernel RPC server for - registering kernel RPC services via version 4 of the rpcbind - protocol. If you enable this option, you must run a portmapper - daemon that supports rpcbind protocol version 4. - - Serving NFS over IPv6 from knfsd (the kernel's NFS server) - requires that you enable this option and use a portmapper that - supports rpcbind version 4. - - If unsure, say N to get traditional behavior (register kernel - RPC services using only rpcbind version 2). Distributions - using the legacy Linux portmapper daemon must say N here. - config RPCSEC_GSS_KRB5 tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL -- cgit v1.2.3 From 2f181855a0b3c2b39314944add7b41c15647cf86 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 28 Mar 2009 23:39:18 -0700 Subject: gso: Fix support for linear packets When GRO/frag_list support was added to GSO, I made an error which broke the support for segmenting linear GSO packets (GSO packets are normally non-linear in the payload). These days most of these packets are constructed by the tun driver, which prefers to allocate linear memory if possible. This is fixed in the latest kernel, but for 2.6.29 and earlier it is still the norm. Therefore this bug causes failures with GSO when used with tun in 2.6.29. Reported-by: James Huang Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/skbuff.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 6acbf9e79eb1..ce6356cd9f71 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2579,7 +2579,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) skb_network_header_len(skb)); skb_copy_from_linear_data(skb, nskb->data, doffset); - if (pos >= offset + len) + if (fskb != skb_shinfo(skb)->frag_list) continue; if (!sg) { -- cgit v1.2.3 From f940964901aa69e28ce729d7614061d014184472 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 28 Mar 2009 15:38:30 +0000 Subject: netfilter: fix endian bug in conntrack printks dcc_ip is treated as a host-endian value in the first printk, but the second printk uses %pI4 which expects a be32. This will cause a mismatch between the debug statement and the warning statement. Treat as a be32 throughout and avoid some byteswapping during some comparisions, and allow another user of HIPQUAD to bite the dust. Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/netfilter/nf_conntrack_irc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 409c8be58e7c..8bd98c84f77e 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -66,7 +66,7 @@ static const char *const dccprotos[] = { * ad_beg_p returns pointer to first byte of addr data * ad_end_p returns pointer to last byte of addr data */ -static int parse_dcc(char *data, const char *data_end, u_int32_t *ip, +static int parse_dcc(char *data, const char *data_end, __be32 *ip, u_int16_t *port, char **ad_beg_p, char **ad_end_p) { char *tmp; @@ -85,7 +85,7 @@ static int parse_dcc(char *data, const char *data_end, u_int32_t *ip, return -1; *ad_beg_p = data; - *ip = simple_strtoul(data, &data, 10); + *ip = cpu_to_be32(simple_strtoul(data, &data, 10)); /* skip blanks between ip and port */ while (*data == ' ') { @@ -112,7 +112,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, int dir = CTINFO2DIR(ctinfo); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; - u_int32_t dcc_ip; + __be32 dcc_ip; u_int16_t dcc_port; __be16 port; int i, ret = NF_ACCEPT; @@ -177,13 +177,14 @@ static int help(struct sk_buff *skb, unsigned int protoff, pr_debug("unable to parse dcc command\n"); continue; } - pr_debug("DCC bound ip/port: %u.%u.%u.%u:%u\n", - HIPQUAD(dcc_ip), dcc_port); + + pr_debug("DCC bound ip/port: %pI4:%u\n", + &dcc_ip, dcc_port); /* dcc_ip can be the internal OR external (NAT'ed) IP */ tuple = &ct->tuplehash[dir].tuple; - if (tuple->src.u3.ip != htonl(dcc_ip) && - tuple->dst.u3.ip != htonl(dcc_ip)) { + if (tuple->src.u3.ip != dcc_ip && + tuple->dst.u3.ip != dcc_ip) { if (net_ratelimit()) printk(KERN_WARNING "Forged DCC command from %pI4: %pI4:%u\n", -- cgit v1.2.3 From e7557af56a576762a655f1aaaded253ad14c5958 Mon Sep 17 00:00:00 2001 From: Harvey Harrison Date: Sat, 28 Mar 2009 15:38:31 +0000 Subject: netpoll: store local and remote ip in net-endian Allows for the removal of byteswapping in some places and the removal of HIPQUAD (replaced by %pI4). Signed-off-by: Harvey Harrison Signed-off-by: David S. Miller --- net/core/netpoll.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 755414cd49d1..b5873bdff612 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -345,8 +345,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) udph->dest = htons(np->remote_port); udph->len = htons(udp_len); udph->check = 0; - udph->check = csum_tcpudp_magic(htonl(np->local_ip), - htonl(np->remote_ip), + udph->check = csum_tcpudp_magic(np->local_ip, + np->remote_ip, udp_len, IPPROTO_UDP, csum_partial(udph, udp_len, 0)); if (udph->check == 0) @@ -365,8 +365,8 @@ void netpoll_send_udp(struct netpoll *np, const char *msg, int len) iph->ttl = 64; iph->protocol = IPPROTO_UDP; iph->check = 0; - put_unaligned(htonl(np->local_ip), &(iph->saddr)); - put_unaligned(htonl(np->remote_ip), &(iph->daddr)); + put_unaligned(np->local_ip, &(iph->saddr)); + put_unaligned(np->remote_ip, &(iph->daddr)); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); @@ -424,7 +424,7 @@ static void arp_reply(struct sk_buff *skb) memcpy(&tip, arp_ptr, 4); /* Should we ignore arp? */ - if (tip != htonl(np->local_ip) || + if (tip != np->local_ip || ipv4_is_loopback(tip) || ipv4_is_multicast(tip)) return; @@ -533,9 +533,9 @@ int __netpoll_rx(struct sk_buff *skb) goto out; if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr)) goto out; - if (np->local_ip && np->local_ip != ntohl(iph->daddr)) + if (np->local_ip && np->local_ip != iph->daddr) goto out; - if (np->remote_ip && np->remote_ip != ntohl(iph->saddr)) + if (np->remote_ip && np->remote_ip != iph->saddr) goto out; if (np->local_port && np->local_port != ntohs(uh->dest)) goto out; @@ -560,14 +560,14 @@ void netpoll_print_options(struct netpoll *np) { printk(KERN_INFO "%s: local port %d\n", np->name, np->local_port); - printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->local_ip)); + printk(KERN_INFO "%s: local IP %pI4\n", + np->name, &np->local_ip); printk(KERN_INFO "%s: interface %s\n", np->name, np->dev_name); printk(KERN_INFO "%s: remote port %d\n", np->name, np->remote_port); - printk(KERN_INFO "%s: remote IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->remote_ip)); + printk(KERN_INFO "%s: remote IP %pI4\n", + np->name, &np->remote_ip); printk(KERN_INFO "%s: remote ethernet address %pM\n", np->name, np->remote_mac); } @@ -589,7 +589,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->local_ip = ntohl(in_aton(cur)); + np->local_ip = in_aton(cur); cur = delim; } cur++; @@ -618,7 +618,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; - np->remote_ip = ntohl(in_aton(cur)); + np->remote_ip = in_aton(cur); cur = delim + 1; if (*cur != 0) { @@ -759,10 +759,9 @@ int netpoll_setup(struct netpoll *np) goto release; } - np->local_ip = ntohl(in_dev->ifa_list->ifa_local); + np->local_ip = in_dev->ifa_list->ifa_local; rcu_read_unlock(); - printk(KERN_INFO "%s: local IP %d.%d.%d.%d\n", - np->name, HIPQUAD(np->local_ip)); + printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip); } if (np->rx_hook) { -- cgit v1.2.3 From 424b86a6bc9459a830e1e94e0e908f3ac1716b7e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 29 Mar 2009 13:46:01 -0700 Subject: netfilter: xtables: fix IPv6 dependency in the cluster match This patch fixes a dependency with IPv6: ERROR: "__ipv6_addr_type" [net/netfilter/xt_cluster.ko] undefined! This patch adds a function that checks if the higher bits of the address is 0xFF to identify a multicast address, instead of adding a dependency due to __ipv6_addr_type(). I came up with this idea after Patrick McHardy pointed possible problems with runtime module dependencies. Reported-by: Steven Noonan Reported-by: Randy Dunlap Reported-by: Cyrill Gorcunov Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- net/netfilter/xt_cluster.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index ad5bd890e4e8..6c4847662b85 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -57,6 +57,13 @@ xt_cluster_hash(const struct nf_conn *ct, return (((u64)hash * info->total_nodes) >> 32); } +static inline bool +xt_cluster_ipv6_is_multicast(const struct in6_addr *addr) +{ + __be32 st = addr->s6_addr32[0]; + return ((st & htonl(0xFF000000)) == htonl(0xFF000000)); +} + static inline bool xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) { @@ -67,8 +74,8 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr); break; case NFPROTO_IPV6: - is_multicast = ipv6_addr_type(&ipv6_hdr(skb)->daddr) & - IPV6_ADDR_MULTICAST; + is_multicast = + xt_cluster_ipv6_is_multicast(&ipv6_hdr(skb)->daddr); break; default: WARN_ON(1); -- cgit v1.2.3 From aa85ea5b89c36c51200d795dd788139bd9b8cf50 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 30 Mar 2009 22:05:15 -0600 Subject: cpumask: use new cpumask_ functions in core code. Impact: cleanup Time to clean up remaining laggards using the old cpu_ functions. Signed-off-by: Rusty Russell Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: Trond.Myklebust@netapp.com --- net/sunrpc/svc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index c51fed4d1af1..bb507e2bb94d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -312,7 +312,7 @@ svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) switch (m->mode) { case SVC_POOL_PERCPU: { - set_cpus_allowed_ptr(task, &cpumask_of_cpu(node)); + set_cpus_allowed_ptr(task, cpumask_of(node)); break; } case SVC_POOL_PERNODE: -- cgit v1.2.3 From 692105b8ac5bcd75dc65f6a8f10bdbd0f0f34dcf Mon Sep 17 00:00:00 2001 From: Matt LaPlante Date: Mon, 26 Jan 2009 11:12:25 +0100 Subject: trivial: fix typos/grammar errors in Kconfig texts Signed-off-by: Matt LaPlante Acked-by: Randy Dunlap Signed-off-by: Jiri Kosina --- net/Kconfig | 2 +- net/ipv6/Kconfig | 18 +++++++++--------- net/mac80211/Kconfig | 2 +- net/netfilter/Kconfig | 2 +- net/phonet/Kconfig | 2 +- net/sunrpc/Kconfig | 2 +- net/wimax/Kconfig | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) (limited to 'net') diff --git a/net/Kconfig b/net/Kconfig index ec93e7e38b38..ce77db4fcec8 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -140,7 +140,7 @@ config NETFILTER_ADVANCED default y help If you say Y here you can select between all the netfilter modules. - If you say N the more ununsual ones will not be shown and the + If you say N the more unusual ones will not be shown and the basic ones needed by most people will default to 'M'. If unsure, say Y. diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index ec992159b5f8..ca8cb326d1d2 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -22,17 +22,17 @@ menuconfig IPV6 if IPV6 config IPV6_PRIVACY - bool "IPv6: Privacy Extensions support" + bool "IPv6: Privacy Extensions (RFC 3041) support" ---help--- Privacy Extensions for Stateless Address Autoconfiguration in IPv6 - support. With this option, additional periodically-alter - pseudo-random global-scope unicast address(es) will assigned to + support. With this option, additional periodically-altered + pseudo-random global-scope unicast address(es) will be assigned to your interface(s). - We use our standard pseudo random algorithm to generate randomized - interface identifier, instead of one described in RFC 3041. + We use our standard pseudo-random algorithm to generate the + randomized interface identifier, instead of one described in RFC 3041. - By default, kernel do not generate temporary addresses. + By default the kernel does not generate temporary addresses. To use temporary addresses, do echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr @@ -43,9 +43,9 @@ config IPV6_ROUTER_PREF bool "IPv6: Router Preference (RFC 4191) support" ---help--- Router Preference is an optional extension to the Router - Advertisement message to improve the ability of hosts - to pick more appropriate router, especially when the hosts - is placed in a multi-homed network. + Advertisement message which improves the ability of hosts + to pick an appropriate router, especially when the hosts + are placed in a multi-homed network. If unsure, say N. diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 60c16162474c..f3d9ae350fb6 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -33,7 +33,7 @@ choice ---help--- This option selects the default rate control algorithm mac80211 will use. Note that this default can still be - overriden through the ieee80211_default_rc_algo module + overridden through the ieee80211_default_rc_algo module parameter if different algorithms are available. config MAC80211_RC_DEFAULT_PID diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 2c967e4f706c..bb279bf59a1b 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -52,7 +52,7 @@ config NF_CT_ACCT Please note that currently this option only sets a default state. You may change it at boot time with nf_conntrack.acct=0/1 kernel - paramater or by loading the nf_conntrack module with acct=0/1. + parameter or by loading the nf_conntrack module with acct=0/1. You may also disable/enable it on a running system with: sysctl net.netfilter.nf_conntrack_acct=0/1 diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig index 51a5669573f2..6ec7d55b1769 100644 --- a/net/phonet/Kconfig +++ b/net/phonet/Kconfig @@ -6,7 +6,7 @@ config PHONET tristate "Phonet protocols family" help The Phone Network protocol (PhoNet) is a packet-oriented - communication protocol developped by Nokia for use with its modems. + communication protocol developed by Nokia for use with its modems. This is required for Maemo to use cellular data connectivity (if supported). It can also be used to control Nokia phones diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index 5592883e1e4a..3f7faa9688ae 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -69,7 +69,7 @@ config RPCSEC_GSS_SPKM3 select CRYPTO_CBC help Choose Y here to enable Secure RPC using the SPKM3 public key - GSS-API mechansim (RFC 2025). + GSS-API mechanism (RFC 2025). Secure RPC calls with SPKM3 require an auxiliary userspace daemon which may be found in the Linux nfs-utils package diff --git a/net/wimax/Kconfig b/net/wimax/Kconfig index 18495cdcd10d..1b46747a5f5a 100644 --- a/net/wimax/Kconfig +++ b/net/wimax/Kconfig @@ -8,7 +8,7 @@ # # As well, enablement of the RFKILL code means we need the INPUT layer # support to inject events coming from hw rfkill switches. That -# dependency could be killed if input.h provided appropiate means to +# dependency could be killed if input.h provided appropriate means to # work when input is disabled. comment "WiMAX Wireless Broadband support requires CONFIG_INPUT enabled" -- cgit v1.2.3 From 99b76233803beab302123d243eea9e41149804f3 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 25 Mar 2009 22:48:06 +0300 Subject: proc 2/2: remove struct proc_dir_entry::owner Setting ->owner as done currently (pde->owner = THIS_MODULE) is racy as correctly noted at bug #12454. Someone can lookup entry with NULL ->owner, thus not pinning enything, and release it later resulting in module refcount underflow. We can keep ->owner and supply it at registration time like ->proc_fops and ->data. But this leaves ->owner as easy-manipulative field (just one C assignment) and somebody will forget to unpin previous/pin current module when switching ->owner. ->proc_fops is declared as "const" which should give some thoughts. ->read_proc/->write_proc were just fixed to not require ->owner for protection. rmmod'ed directories will be empty and return "." and ".." -- no harm. And directories with tricky enough readdir and lookup shouldn't be modular. We definitely don't want such modular code. Removing ->owner will also make PDE smaller. So, let's nuke it. Kudos to Jeff Layton for reminding about this, let's say, oversight. http://bugzilla.kernel.org/show_bug.cgi?id=12454 Signed-off-by: Alexey Dobriyan --- net/appletalk/atalk_proc.c | 1 - net/atm/mpoa_proc.c | 1 - net/atm/proc.c | 1 - net/can/bcm.c | 4 ---- net/can/proc.c | 2 -- net/core/pktgen.c | 1 - net/irda/irproc.c | 1 - net/llc/llc_proc.c | 1 - net/sctp/protocol.c | 8 ++------ net/sunrpc/cache.c | 4 ---- net/sunrpc/stats.c | 10 ++-------- 11 files changed, 4 insertions(+), 30 deletions(-) (limited to 'net') diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index 162199a2d74f..fd8e0847b254 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -281,7 +281,6 @@ int __init atalk_proc_init(void) atalk_proc_dir = proc_mkdir("atalk", init_net.proc_net); if (!atalk_proc_dir) goto out; - atalk_proc_dir->owner = THIS_MODULE; p = proc_create("interface", S_IRUGO, atalk_proc_dir, &atalk_seq_interface_fops); diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 4990541ef5da..1a0f5ccea9c4 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -281,7 +281,6 @@ int mpc_proc_init(void) printk(KERN_ERR "Unable to initialize /proc/atm/%s\n", STAT_FILE_NAME); return -ENOMEM; } - p->owner = THIS_MODULE; return 0; } diff --git a/net/atm/proc.c b/net/atm/proc.c index 49487b313f22..e7b3b273907d 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -476,7 +476,6 @@ int __init atm_proc_init(void) atm_proc_root, e->proc_fops); if (!dirent) goto err_out_remove; - dirent->owner = THIS_MODULE; e->dirent = dirent; } ret = 0; diff --git a/net/can/bcm.c b/net/can/bcm.c index b7c7d4651136..95d7f32643ae 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1604,10 +1604,6 @@ static int __init bcm_module_init(void) /* create /proc/net/can-bcm directory */ proc_dir = proc_mkdir("can-bcm", init_net.proc_net); - - if (proc_dir) - proc_dir->owner = THIS_MODULE; - return 0; } diff --git a/net/can/proc.c b/net/can/proc.c index 520fef5e5398..1463653dbe34 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -473,8 +473,6 @@ void can_init_proc(void) return; } - can_dir->owner = THIS_MODULE; - /* own procfs entries from the AF_CAN core */ pde_version = can_create_proc_readentry(CAN_PROC_VERSION, 0644, can_proc_read_version, NULL); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 32d419f5ac98..3779c1438c11 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3806,7 +3806,6 @@ static int __init pg_init(void) pg_proc_dir = proc_mkdir(PG_PROC_DIR, init_net.proc_net); if (!pg_proc_dir) return -ENODEV; - pg_proc_dir->owner = THIS_MODULE; pe = proc_create(PGCTRL, 0600, pg_proc_dir, &pktgen_fops); if (pe == NULL) { diff --git a/net/irda/irproc.c b/net/irda/irproc.c index 88e80a312732..8ff1861649e8 100644 --- a/net/irda/irproc.c +++ b/net/irda/irproc.c @@ -70,7 +70,6 @@ void __init irda_proc_register(void) proc_irda = proc_mkdir("irda", init_net.proc_net); if (proc_irda == NULL) return; - proc_irda->owner = THIS_MODULE; for (i = 0; i < ARRAY_SIZE(irda_dirs); i++) d = proc_create(irda_dirs[i].name, 0, proc_irda, diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index b58bd7c6cdf8..d208b3396d94 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -236,7 +236,6 @@ int __init llc_proc_init(void) llc_proc_dir = proc_mkdir("llc", init_net.proc_net); if (!llc_proc_dir) goto out; - llc_proc_dir->owner = THIS_MODULE; p = proc_create("socket", S_IRUGO, llc_proc_dir, &llc_seq_socket_fops); if (!p) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index cb198af8887c..8eb3e61cb701 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -106,12 +106,8 @@ static __init int sctp_proc_init(void) goto out_nomem; #ifdef CONFIG_PROC_FS if (!proc_net_sctp) { - struct proc_dir_entry *ent; - ent = proc_mkdir("sctp", init_net.proc_net); - if (ent) { - ent->owner = THIS_MODULE; - proc_net_sctp = ent; - } else + proc_net_sctp = proc_mkdir("sctp", init_net.proc_net); + if (!proc_net_sctp) goto out_free_percpu; } diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 4735caad26ed..20029a79a5de 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -313,7 +313,6 @@ static int create_cache_proc_entries(struct cache_detail *cd) cd->proc_ent = proc_mkdir(cd->name, proc_net_rpc); if (cd->proc_ent == NULL) goto out_nomem; - cd->proc_ent->owner = cd->owner; cd->channel_ent = cd->content_ent = NULL; p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR, @@ -321,7 +320,6 @@ static int create_cache_proc_entries(struct cache_detail *cd) cd->flush_ent = p; if (p == NULL) goto out_nomem; - p->owner = cd->owner; if (cd->cache_request || cd->cache_parse) { p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR, @@ -329,7 +327,6 @@ static int create_cache_proc_entries(struct cache_detail *cd) cd->channel_ent = p; if (p == NULL) goto out_nomem; - p->owner = cd->owner; } if (cd->cache_show) { p = proc_create_data("content", S_IFREG|S_IRUSR|S_IWUSR, @@ -337,7 +334,6 @@ static int create_cache_proc_entries(struct cache_detail *cd) cd->content_ent = p; if (p == NULL) goto out_nomem; - p->owner = cd->owner; } return 0; out_nomem: diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c index 085372ef4feb..1ef6e46d9da2 100644 --- a/net/sunrpc/stats.c +++ b/net/sunrpc/stats.c @@ -262,14 +262,8 @@ void rpc_proc_init(void) { dprintk("RPC: registering /proc/net/rpc\n"); - if (!proc_net_rpc) { - struct proc_dir_entry *ent; - ent = proc_mkdir("rpc", init_net.proc_net); - if (ent) { - ent->owner = THIS_MODULE; - proc_net_rpc = ent; - } - } + if (!proc_net_rpc) + proc_net_rpc = proc_mkdir("rpc", init_net.proc_net); } void -- cgit v1.2.3 From 377f0a08e4cb56658d878d22c3aed4716e283c6b Mon Sep 17 00:00:00 2001 From: Rami Rosen Date: Tue, 31 Mar 2009 14:43:17 -0700 Subject: ipv4: remove unused parameter from tcp_recv_urg(). Signed-off-by: Rami Rosen Signed-off-by: David S. Miller --- net/ipv4/tcp.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2451aeb5ac23..fafbec8b073e 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1081,8 +1081,7 @@ out_err: * this, no blocking and very strange errors 8) */ -static int tcp_recv_urg(struct sock *sk, long timeo, - struct msghdr *msg, int len, int flags) +static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags) { struct tcp_sock *tp = tcp_sk(sk); @@ -1697,7 +1696,7 @@ out: return err; recv_urg: - err = tcp_recv_urg(sk, timeo, msg, len, flags); + err = tcp_recv_urg(sk, msg, len, flags); goto out; } -- cgit v1.2.3 From c9caceca25854eff4328c89045793a91bf8f9ee3 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Tue, 31 Mar 2009 15:06:26 -0700 Subject: core: remove pointless conditional before kfree() Remove pointless conditional before kfree(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/core/ethtool.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 244ca56dffac..d9d5160610d5 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -261,8 +261,7 @@ static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr) ret = 0; err_out: - if (rule_buf) - kfree(rule_buf); + kfree(rule_buf); return ret; } -- cgit v1.2.3 From ce3b0f8d5c2203301fc87f3aaaed73e5819e2a48 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:08:22 -0400 Subject: New helper - current_umask() current->fs->umask is what most of fs_struct users are doing. Put that into a helper function. Signed-off-by: Al Viro --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index baac91049b0e..9dcc6e7f96ec 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -832,7 +832,7 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * All right, let's create it. */ mode = S_IFSOCK | - (SOCK_INODE(sock)->i_mode & ~current->fs->umask); + (SOCK_INODE(sock)->i_mode & ~current_umask()); err = mnt_want_write(nd.path.mnt); if (err) goto out_mknod_dput; -- cgit v1.2.3 From 3d3041768296c4993c0aba686bf0775faab5236a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 31 Mar 2009 15:19:22 -0700 Subject: proc tty: switch ircomm to ->proc_fops Signed-off-by: Alexey Dobriyan Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/irda/ircomm/ircomm_tty.c | 256 +++++++++++++++++++++++-------------------- 1 file changed, 138 insertions(+), 118 deletions(-) (limited to 'net') diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 086d5ef098fd..811984d9324b 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -72,8 +73,7 @@ static int ircomm_tty_control_indication(void *instance, void *sap, static void ircomm_tty_flow_indication(void *instance, void *sap, LOCAL_FLOW cmd); #ifdef CONFIG_PROC_FS -static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, - int *eof, void *unused); +static const struct file_operations ircomm_tty_proc_fops; #endif /* CONFIG_PROC_FS */ static struct tty_driver *driver; @@ -98,7 +98,7 @@ static const struct tty_operations ops = { .hangup = ircomm_tty_hangup, .wait_until_sent = ircomm_tty_wait_until_sent, #ifdef CONFIG_PROC_FS - .read_proc = ircomm_tty_read_proc, + .proc_fops = &ircomm_tty_proc_fops, #endif /* CONFIG_PROC_FS */ }; @@ -1245,150 +1245,170 @@ static void ircomm_tty_flow_indication(void *instance, void *sap, } #ifdef CONFIG_PROC_FS -static int ircomm_tty_line_info(struct ircomm_tty_cb *self, char *buf) +static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) { - int ret=0; + char sep; - ret += sprintf(buf+ret, "State: %s\n", ircomm_tty_state[self->state]); + seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]); - ret += sprintf(buf+ret, "Service type: "); + seq_puts(m, "Service type: "); if (self->service_type & IRCOMM_9_WIRE) - ret += sprintf(buf+ret, "9_WIRE"); + seq_puts(m, "9_WIRE"); else if (self->service_type & IRCOMM_3_WIRE) - ret += sprintf(buf+ret, "3_WIRE"); + seq_puts(m, "3_WIRE"); else if (self->service_type & IRCOMM_3_WIRE_RAW) - ret += sprintf(buf+ret, "3_WIRE_RAW"); + seq_puts(m, "3_WIRE_RAW"); else - ret += sprintf(buf+ret, "No common service type!\n"); - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Port name: %s\n", self->settings.port_name); - - ret += sprintf(buf+ret, "DTE status: "); - if (self->settings.dte & IRCOMM_RTS) - ret += sprintf(buf+ret, "RTS|"); - if (self->settings.dte & IRCOMM_DTR) - ret += sprintf(buf+ret, "DTR|"); - if (self->settings.dte) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "DCE status: "); - if (self->settings.dce & IRCOMM_CTS) - ret += sprintf(buf+ret, "CTS|"); - if (self->settings.dce & IRCOMM_DSR) - ret += sprintf(buf+ret, "DSR|"); - if (self->settings.dce & IRCOMM_CD) - ret += sprintf(buf+ret, "CD|"); - if (self->settings.dce & IRCOMM_RI) - ret += sprintf(buf+ret, "RI|"); - if (self->settings.dce) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Configuration: "); + seq_puts(m, "No common service type!\n"); + seq_putc(m, '\n'); + + seq_printf(m, "Port name: %s\n", self->settings.port_name); + + seq_printf(m, "DTE status:"); + sep = ' '; + if (self->settings.dte & IRCOMM_RTS) { + seq_printf(m, "%cRTS", sep); + sep = '|'; + } + if (self->settings.dte & IRCOMM_DTR) { + seq_printf(m, "%cDTR", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "DCE status:"); + sep = ' '; + if (self->settings.dce & IRCOMM_CTS) { + seq_printf(m, "%cCTS", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_DSR) { + seq_printf(m, "%cDSR", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_CD) { + seq_printf(m, "%cCD", sep); + sep = '|'; + } + if (self->settings.dce & IRCOMM_RI) { + seq_printf(m, "%cRI", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "Configuration: "); if (!self->settings.null_modem) - ret += sprintf(buf+ret, "DTE <-> DCE\n"); + seq_puts(m, "DTE <-> DCE\n"); else - ret += sprintf(buf+ret, - "DTE <-> DTE (null modem emulation)\n"); - - ret += sprintf(buf+ret, "Data rate: %d\n", self->settings.data_rate); - - ret += sprintf(buf+ret, "Flow control: "); - if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) - ret += sprintf(buf+ret, "XON_XOFF_IN|"); - if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) - ret += sprintf(buf+ret, "XON_XOFF_OUT|"); - if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) - ret += sprintf(buf+ret, "RTS_CTS_IN|"); - if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) - ret += sprintf(buf+ret, "RTS_CTS_OUT|"); - if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) - ret += sprintf(buf+ret, "DSR_DTR_IN|"); - if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) - ret += sprintf(buf+ret, "DSR_DTR_OUT|"); - if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) - ret += sprintf(buf+ret, "ENQ_ACK_IN|"); - if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) - ret += sprintf(buf+ret, "ENQ_ACK_OUT|"); - if (self->settings.flow_control) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Flags: "); - if (self->flags & ASYNC_CTS_FLOW) - ret += sprintf(buf+ret, "ASYNC_CTS_FLOW|"); - if (self->flags & ASYNC_CHECK_CD) - ret += sprintf(buf+ret, "ASYNC_CHECK_CD|"); - if (self->flags & ASYNC_INITIALIZED) - ret += sprintf(buf+ret, "ASYNC_INITIALIZED|"); - if (self->flags & ASYNC_LOW_LATENCY) - ret += sprintf(buf+ret, "ASYNC_LOW_LATENCY|"); - if (self->flags & ASYNC_CLOSING) - ret += sprintf(buf+ret, "ASYNC_CLOSING|"); - if (self->flags & ASYNC_NORMAL_ACTIVE) - ret += sprintf(buf+ret, "ASYNC_NORMAL_ACTIVE|"); - if (self->flags) - ret--; /* remove the last | */ - ret += sprintf(buf+ret, "\n"); - - ret += sprintf(buf+ret, "Role: %s\n", self->client ? - "client" : "server"); - ret += sprintf(buf+ret, "Open count: %d\n", self->open_count); - ret += sprintf(buf+ret, "Max data size: %d\n", self->max_data_size); - ret += sprintf(buf+ret, "Max header size: %d\n", self->max_header_size); + seq_puts(m, "DTE <-> DTE (null modem emulation)\n"); + + seq_printf(m, "Data rate: %d\n", self->settings.data_rate); + + seq_puts(m, "Flow control:"); + sep = ' '; + if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) { + seq_printf(m, "%cXON_XOFF_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) { + seq_printf(m, "%cXON_XOFF_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) { + seq_printf(m, "%cRTS_CTS_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) { + seq_printf(m, "%cRTS_CTS_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) { + seq_printf(m, "%cDSR_DTR_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) { + seq_printf(m, "%cDSR_DTR_OUT", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) { + seq_printf(m, "%cENQ_ACK_IN", sep); + sep = '|'; + } + if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) { + seq_printf(m, "%cENQ_ACK_OUT", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_puts(m, "Flags:"); + sep = ' '; + if (self->flags & ASYNC_CTS_FLOW) { + seq_printf(m, "%cASYNC_CTS_FLOW", sep); + sep = '|'; + } + if (self->flags & ASYNC_CHECK_CD) { + seq_printf(m, "%cASYNC_CHECK_CD", sep); + sep = '|'; + } + if (self->flags & ASYNC_INITIALIZED) { + seq_printf(m, "%cASYNC_INITIALIZED", sep); + sep = '|'; + } + if (self->flags & ASYNC_LOW_LATENCY) { + seq_printf(m, "%cASYNC_LOW_LATENCY", sep); + sep = '|'; + } + if (self->flags & ASYNC_CLOSING) { + seq_printf(m, "%cASYNC_CLOSING", sep); + sep = '|'; + } + if (self->flags & ASYNC_NORMAL_ACTIVE) { + seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep); + sep = '|'; + } + seq_putc(m, '\n'); + + seq_printf(m, "Role: %s\n", self->client ? "client" : "server"); + seq_printf(m, "Open count: %d\n", self->open_count); + seq_printf(m, "Max data size: %d\n", self->max_data_size); + seq_printf(m, "Max header size: %d\n", self->max_header_size); if (self->tty) - ret += sprintf(buf+ret, "Hardware: %s\n", + seq_printf(m, "Hardware: %s\n", self->tty->hw_stopped ? "Stopped" : "Running"); - - ret += sprintf(buf+ret, "\n"); - return ret; } - -/* - * Function ircomm_tty_read_proc (buf, start, offset, len, eof, unused) - * - * - * - */ -static int ircomm_tty_read_proc(char *buf, char **start, off_t offset, int len, - int *eof, void *unused) +static int ircomm_tty_proc_show(struct seq_file *m, void *v) { struct ircomm_tty_cb *self; - int count = 0, l; - off_t begin = 0; unsigned long flags; spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags); self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty); - while ((self != NULL) && (count < 4000)) { + while (self != NULL) { if (self->magic != IRCOMM_TTY_MAGIC) break; - l = ircomm_tty_line_info(self, buf + count); - count += l; - if (count+begin > offset+len) - goto done; - if (count+begin < offset) { - begin += count; - count = 0; - } - + ircomm_tty_line_info(self, m); self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty); } - *eof = 1; -done: spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags); + return 0; +} - if (offset >= count+begin) - return 0; - *start = buf + (offset-begin); - return ((len < begin+count-offset) ? len : begin+count-offset); +static int ircomm_tty_proc_open(struct inode *inode, struct file *file) +{ + return single_open(file, ircomm_tty_proc_show, NULL); } + +static const struct file_operations ircomm_tty_proc_fops = { + .owner = THIS_MODULE, + .open = ircomm_tty_proc_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; #endif /* CONFIG_PROC_FS */ MODULE_AUTHOR("Dag Brattli "); -- cgit v1.2.3 From 0f043a81ebe84be3576667f04fdda481609e3816 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 31 Mar 2009 15:19:25 -0700 Subject: proc tty: remove struct tty_operations::read_proc struct tty_operations::proc_fops took it's place and there is one less create_proc_read_entry() user now! Signed-off-by: Alexey Dobriyan Cc: Alan Cox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/bluetooth/rfcomm/tty.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'net') diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index abdc703a11d2..cab71ea2796d 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -1093,11 +1093,6 @@ static void rfcomm_tty_hangup(struct tty_struct *tty) } } -static int rfcomm_tty_read_proc(char *buf, char **start, off_t offset, int len, int *eof, void *unused) -{ - return 0; -} - static int rfcomm_tty_tiocmget(struct tty_struct *tty, struct file *filp) { struct rfcomm_dev *dev = (struct rfcomm_dev *) tty->driver_data; @@ -1156,7 +1151,6 @@ static const struct tty_operations rfcomm_ops = { .send_xchar = rfcomm_tty_send_xchar, .hangup = rfcomm_tty_hangup, .wait_until_sent = rfcomm_tty_wait_until_sent, - .read_proc = rfcomm_tty_read_proc, .tiocmget = rfcomm_tty_tiocmget, .tiocmset = rfcomm_tty_tiocmset, }; -- cgit v1.2.3 From 37e5540b3c9d838eb20f2ca8ea2eb8072271e403 Mon Sep 17 00:00:00 2001 From: Davide Libenzi Date: Tue, 31 Mar 2009 15:24:21 -0700 Subject: epoll keyed wakeups: make sockets use keyed wakeups Add support for event-aware wakeups to the sockets code. Events are delivered to the wakeup target, so that epoll can avoid spurious wakeups for non-interesting events. Signed-off-by: Davide Libenzi Acked-by: Alan Cox Cc: Ingo Molnar Cc: David Miller Cc: William Lee Irwin III Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/core/sock.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 0620046e4eba..7dbf3ffb35cc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1677,7 +1677,7 @@ static void sock_def_error_report(struct sock *sk) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible(sk->sk_sleep); + wake_up_interruptible_poll(sk->sk_sleep, POLLERR); sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1686,7 +1686,8 @@ static void sock_def_readable(struct sock *sk, int len) { read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLIN | + POLLRDNORM | POLLRDBAND); sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1700,7 +1701,8 @@ static void sock_def_write_space(struct sock *sk) */ if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) - wake_up_interruptible_sync(sk->sk_sleep); + wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT | + POLLWRNORM | POLLWRBAND); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) -- cgit v1.2.3 From c69da774b28e01e062e0a3aba7509f2dcfd2a11a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2009 18:59:17 -0400 Subject: SUNRPC: Ensure IPV6_V6ONLY is set on the socket before binding to a port Also ensure that we use the protocol family instead of the address family when calling sock_create_kern(). Signed-off-by: Trond Myklebust --- net/sunrpc/svcsock.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index ac6cd65220c7..9d504234af4a 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1110,7 +1110,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct svc_sock *svsk; struct sock *inet; int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); - int val; dprintk("svc: svc_setup_socket %p\n", sock); if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { @@ -1143,16 +1142,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - /* - * If this is a PF_INET6 listener, we want to avoid - * getting requests from IPv4 remotes. Those should - * be shunted to a PF_INET listener via rpcbind. - */ - val = 1; - if (inet->sk_family == PF_INET6) - kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, - (char *)&val, sizeof(val)); - dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1220,6 +1209,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, struct sockaddr_storage addr; struct sockaddr *newsin = (struct sockaddr *)&addr; int newlen; + int family; + int val; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); dprintk("svc: svc_create_socket(%s, %d, %s)\n", @@ -1231,14 +1222,35 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, "sockets supported\n"); return ERR_PTR(-EINVAL); } + type = (protocol == IPPROTO_UDP)? SOCK_DGRAM : SOCK_STREAM; + switch (sin->sa_family) { + case AF_INET6: + family = PF_INET6; + break; + case AF_INET: + family = PF_INET; + break; + default: + return ERR_PTR(-EINVAL); + } - error = sock_create_kern(sin->sa_family, type, protocol, &sock); + error = sock_create_kern(family, type, protocol, &sock); if (error < 0) return ERR_PTR(error); svc_reclassify_socket(sock); + /* + * If this is an PF_INET6 listener, we want to avoid + * getting requests from IPv4 remotes. Those should + * be shunted to a PF_INET listener via rpcbind. + */ + val = 1; + if (family == PF_INET6) + kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, + (char *)&val, sizeof(val)); + if (type == SOCK_STREAM) sock->sk->sk_reuse = 1; /* allow address reuse */ error = kernel_bind(sock, sin, len); -- cgit v1.2.3 From f1cffcbfcc53b825da7d1d26244aabd8dccb24aa Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Apr 2009 08:20:18 +0000 Subject: RDS: Fix m_rs_lock deadlock rs_send_drop_to() is called during socket close. If it takes m_rs_lock without disabling interrupts, then rds_send_remove_from_sock() can run from the rx completion handler and thus deadlock. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/send.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/rds/send.c b/net/rds/send.c index 1b37364656f0..104fe033203d 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -615,7 +615,7 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) { struct rds_message *rm, *tmp; struct rds_connection *conn; - unsigned long flags; + unsigned long flags, flags2; LIST_HEAD(list); int wake = 0; @@ -651,9 +651,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) list_for_each_entry(rm, &list, m_sock_item) { /* We do this here rather than in the loop above, so that * we don't have to nest m_rs_lock under rs->rs_lock */ - spin_lock(&rm->m_rs_lock); + spin_lock_irqsave(&rm->m_rs_lock, flags2); rm->m_rs = NULL; - spin_unlock(&rm->m_rs_lock); + spin_unlock_irqrestore(&rm->m_rs_lock, flags2); /* * If we see this flag cleared then we're *sure* that someone -- cgit v1.2.3 From 745cbccac3fe8cead529a1b3358e1e86a1505bfa Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Apr 2009 08:20:19 +0000 Subject: RDS: Rewrite connection cleanup, fixing oops on rmmod This fixes a bug where a connection was unexpectedly not on *any* list while being destroyed. It also cleans up some code duplication and regularizes some function names. * Grab appropriate lock in conn_free() and explain in comment * Ensure via locking that a conn is never not on either a dev's list or the nodev list * Add rds_xx_remove_conn() to match rds_xx_add_conn() * Make rds_xx_add_conn() return void * Rename remove_{,nodev_}conns() to destroy_{,nodev_}conns() and unify their implementation in a helper function * Document lock ordering as nodev conn_lock before dev_conn_lock Reported-by: Yosef Etigin Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.c | 5 +++-- net/rds/ib.h | 14 +++++++++++--- net/rds/ib_cm.c | 34 +++++++++++++++++++--------------- net/rds/ib_rdma.c | 43 +++++++++++++++++++++---------------------- net/rds/iw.c | 5 +++-- net/rds/iw.h | 14 +++++++++++--- net/rds/iw_cm.c | 35 +++++++++++++++++++---------------- net/rds/iw_rdma.c | 44 ++++++++++++++++++++++---------------------- 8 files changed, 109 insertions(+), 85 deletions(-) (limited to 'net') diff --git a/net/rds/ib.c b/net/rds/ib.c index 06a7b798d9a7..4933b380985e 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -51,6 +51,7 @@ MODULE_PARM_DESC(fmr_message_size, " Max size of a RDMA transfer"); struct list_head rds_ib_devices; +/* NOTE: if also grabbing ibdev lock, grab this first */ DEFINE_SPINLOCK(ib_nodev_conns_lock); LIST_HEAD(ib_nodev_conns); @@ -137,7 +138,7 @@ void rds_ib_remove_one(struct ib_device *device) kfree(i_ipaddr); } - rds_ib_remove_conns(rds_ibdev); + rds_ib_destroy_conns(rds_ibdev); if (rds_ibdev->mr_pool) rds_ib_destroy_mr_pool(rds_ibdev->mr_pool); @@ -249,7 +250,7 @@ static int rds_ib_laddr_check(__be32 addr) void rds_ib_exit(void) { rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info); - rds_ib_remove_nodev_conns(); + rds_ib_destroy_nodev_conns(); ib_unregister_client(&rds_ib_client); rds_ib_sysctl_exit(); rds_ib_recv_exit(); diff --git a/net/rds/ib.h b/net/rds/ib.h index 8be563a1363a..c08ffffb3164 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -267,9 +267,17 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, /* ib_rdma.c */ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr); -int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); -void rds_ib_remove_nodev_conns(void); -void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev); +void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); +void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); +void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock); +static inline void rds_ib_destroy_nodev_conns(void) +{ + __rds_ib_destroy_conns(&ib_nodev_conns, &ib_nodev_conns_lock); +} +static inline void rds_ib_destroy_conns(struct rds_ib_device *rds_ibdev) +{ + __rds_ib_destroy_conns(&rds_ibdev->conn_list, &rds_ibdev->spinlock); +} struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *); void rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo); void rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *); diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 0532237bd128..889ab0441359 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -126,9 +126,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even err = rds_ib_update_ipaddr(rds_ibdev, conn->c_laddr); if (err) printk(KERN_ERR "rds_ib_update_ipaddr failed (%d)\n", err); - err = rds_ib_add_conn(rds_ibdev, conn); - if (err) - printk(KERN_ERR "rds_ib_add_conn failed (%d)\n", err); + rds_ib_add_conn(rds_ibdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ @@ -616,18 +614,8 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) /* * Move connection back to the nodev list. */ - if (ic->rds_ibdev) { - - spin_lock_irq(&ic->rds_ibdev->spinlock); - BUG_ON(list_empty(&ic->ib_node)); - list_del(&ic->ib_node); - spin_unlock_irq(&ic->rds_ibdev->spinlock); - - spin_lock_irq(&ib_nodev_conns_lock); - list_add_tail(&ic->ib_node, &ib_nodev_conns); - spin_unlock_irq(&ib_nodev_conns_lock); - ic->rds_ibdev = NULL; - } + if (ic->rds_ibdev) + rds_ib_remove_conn(ic->rds_ibdev, conn); ic->i_cm_id = NULL; ic->i_pd = NULL; @@ -701,11 +689,27 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) return 0; } +/* + * Free a connection. Connection must be shut down and not set for reconnect. + */ void rds_ib_conn_free(void *arg) { struct rds_ib_connection *ic = arg; + spinlock_t *lock_ptr; + rdsdebug("ic %p\n", ic); + + /* + * Conn is either on a dev's list or on the nodev list. + * A race with shutdown() or connect() would cause problems + * (since rds_ibdev would change) but that should never happen. + */ + lock_ptr = ic->rds_ibdev ? &ic->rds_ibdev->spinlock : &ib_nodev_conns_lock; + + spin_lock_irq(lock_ptr); list_del(&ic->ib_node); + spin_unlock_irq(lock_ptr); + kfree(ic); } diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 69a6289ed672..81033af93020 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -139,7 +139,7 @@ int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) return rds_ib_add_ipaddr(rds_ibdev, ipaddr); } -int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) +void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) { struct rds_ib_connection *ic = conn->c_transport_data; @@ -148,45 +148,44 @@ int rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn BUG_ON(list_empty(&ib_nodev_conns)); BUG_ON(list_empty(&ic->ib_node)); list_del(&ic->ib_node); - spin_unlock_irq(&ib_nodev_conns_lock); spin_lock_irq(&rds_ibdev->spinlock); list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); spin_unlock_irq(&rds_ibdev->spinlock); + spin_unlock_irq(&ib_nodev_conns_lock); ic->rds_ibdev = rds_ibdev; - - return 0; } -void rds_ib_remove_nodev_conns(void) +void rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) { - struct rds_ib_connection *ic, *_ic; - LIST_HEAD(tmp_list); + struct rds_ib_connection *ic = conn->c_transport_data; - /* avoid calling conn_destroy with irqs off */ - spin_lock_irq(&ib_nodev_conns_lock); - list_splice(&ib_nodev_conns, &tmp_list); - INIT_LIST_HEAD(&ib_nodev_conns); - spin_unlock_irq(&ib_nodev_conns_lock); + /* place conn on nodev_conns_list */ + spin_lock(&ib_nodev_conns_lock); - list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { - if (ic->conn->c_passive) - rds_conn_destroy(ic->conn->c_passive); - rds_conn_destroy(ic->conn); - } + spin_lock_irq(&rds_ibdev->spinlock); + BUG_ON(list_empty(&ic->ib_node)); + list_del(&ic->ib_node); + spin_unlock_irq(&rds_ibdev->spinlock); + + list_add_tail(&ic->ib_node, &ib_nodev_conns); + + spin_unlock(&ib_nodev_conns_lock); + + ic->rds_ibdev = NULL; } -void rds_ib_remove_conns(struct rds_ib_device *rds_ibdev) +void __rds_ib_destroy_conns(struct list_head *list, spinlock_t *list_lock) { struct rds_ib_connection *ic, *_ic; LIST_HEAD(tmp_list); /* avoid calling conn_destroy with irqs off */ - spin_lock_irq(&rds_ibdev->spinlock); - list_splice(&rds_ibdev->conn_list, &tmp_list); - INIT_LIST_HEAD(&rds_ibdev->conn_list); - spin_unlock_irq(&rds_ibdev->spinlock); + spin_lock_irq(list_lock); + list_splice(list, &tmp_list); + INIT_LIST_HEAD(list); + spin_unlock_irq(list_lock); list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) { if (ic->conn->c_passive) diff --git a/net/rds/iw.c b/net/rds/iw.c index 1b56905c4c08..b732efb5b634 100644 --- a/net/rds/iw.c +++ b/net/rds/iw.c @@ -51,6 +51,7 @@ MODULE_PARM_DESC(fastreg_message_size, " Max size of a RDMA transfer (fastreg MR struct list_head rds_iw_devices; +/* NOTE: if also grabbing iwdev lock, grab this first */ DEFINE_SPINLOCK(iw_nodev_conns_lock); LIST_HEAD(iw_nodev_conns); @@ -145,7 +146,7 @@ void rds_iw_remove_one(struct ib_device *device) } spin_unlock_irq(&rds_iwdev->spinlock); - rds_iw_remove_conns(rds_iwdev); + rds_iw_destroy_conns(rds_iwdev); if (rds_iwdev->mr_pool) rds_iw_destroy_mr_pool(rds_iwdev->mr_pool); @@ -258,7 +259,7 @@ static int rds_iw_laddr_check(__be32 addr) void rds_iw_exit(void) { rds_info_deregister_func(RDS_INFO_IWARP_CONNECTIONS, rds_iw_ic_info); - rds_iw_remove_nodev_conns(); + rds_iw_destroy_nodev_conns(); ib_unregister_client(&rds_iw_client); rds_iw_sysctl_exit(); rds_iw_recv_exit(); diff --git a/net/rds/iw.h b/net/rds/iw.h index 0ddda34f2a1c..70eb948f42f4 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -294,9 +294,17 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, /* ib_rdma.c */ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_id); -int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); -void rds_iw_remove_nodev_conns(void); -void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev); +void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); +void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn); +void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock); +static inline void rds_iw_destroy_nodev_conns(void) +{ + __rds_iw_destroy_conns(&iw_nodev_conns, &iw_nodev_conns_lock); +} +static inline void rds_iw_destroy_conns(struct rds_iw_device *rds_iwdev) +{ + __rds_iw_destroy_conns(&rds_iwdev->conn_list, &rds_iwdev->spinlock); +} struct rds_iw_mr_pool *rds_iw_create_mr_pool(struct rds_iw_device *); void rds_iw_get_mr_info(struct rds_iw_device *rds_iwdev, struct rds_info_rdma_connection *iinfo); void rds_iw_destroy_mr_pool(struct rds_iw_mr_pool *); diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 57ecb3d4b8a5..0ffaa3e97ad6 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -86,9 +86,7 @@ void rds_iw_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even err = rds_iw_update_cm_id(rds_iwdev, ic->i_cm_id); if (err) printk(KERN_ERR "rds_iw_update_ipaddr failed (%d)\n", err); - err = rds_iw_add_conn(rds_iwdev, conn); - if (err) - printk(KERN_ERR "rds_iw_add_conn failed (%d)\n", err); + rds_iw_add_conn(rds_iwdev, conn); /* If the peer gave us the last packet it saw, process this as if * we had received a regular ACK. */ @@ -637,19 +635,8 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) * Move connection back to the nodev list. * Remove cm_id from the device cm_id list. */ - if (ic->rds_iwdev) { - - spin_lock_irq(&ic->rds_iwdev->spinlock); - BUG_ON(list_empty(&ic->iw_node)); - list_del(&ic->iw_node); - spin_unlock_irq(&ic->rds_iwdev->spinlock); - - spin_lock_irq(&iw_nodev_conns_lock); - list_add_tail(&ic->iw_node, &iw_nodev_conns); - spin_unlock_irq(&iw_nodev_conns_lock); - rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); - ic->rds_iwdev = NULL; - } + if (ic->rds_iwdev) + rds_iw_remove_conn(ic->rds_iwdev, conn); rdma_destroy_id(ic->i_cm_id); @@ -726,11 +713,27 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) return 0; } +/* + * Free a connection. Connection must be shut down and not set for reconnect. + */ void rds_iw_conn_free(void *arg) { struct rds_iw_connection *ic = arg; + spinlock_t *lock_ptr; + rdsdebug("ic %p\n", ic); + + /* + * Conn is either on a dev's list or on the nodev list. + * A race with shutdown() or connect() would cause problems + * (since rds_iwdev would change) but that should never happen. + */ + lock_ptr = ic->rds_iwdev ? &ic->rds_iwdev->spinlock : &iw_nodev_conns_lock; + + spin_lock_irq(lock_ptr); list_del(&ic->iw_node); + spin_unlock_irq(lock_ptr); + kfree(ic); } diff --git a/net/rds/iw_rdma.c b/net/rds/iw_rdma.c index 1c02a8f952d0..dcdb37da80f2 100644 --- a/net/rds/iw_rdma.c +++ b/net/rds/iw_rdma.c @@ -196,7 +196,7 @@ int rds_iw_update_cm_id(struct rds_iw_device *rds_iwdev, struct rdma_cm_id *cm_i return rds_iw_add_cm_id(rds_iwdev, cm_id); } -int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) +void rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) { struct rds_iw_connection *ic = conn->c_transport_data; @@ -205,45 +205,45 @@ int rds_iw_add_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn BUG_ON(list_empty(&iw_nodev_conns)); BUG_ON(list_empty(&ic->iw_node)); list_del(&ic->iw_node); - spin_unlock_irq(&iw_nodev_conns_lock); spin_lock_irq(&rds_iwdev->spinlock); list_add_tail(&ic->iw_node, &rds_iwdev->conn_list); spin_unlock_irq(&rds_iwdev->spinlock); + spin_unlock_irq(&iw_nodev_conns_lock); ic->rds_iwdev = rds_iwdev; - - return 0; } -void rds_iw_remove_nodev_conns(void) +void rds_iw_remove_conn(struct rds_iw_device *rds_iwdev, struct rds_connection *conn) { - struct rds_iw_connection *ic, *_ic; - LIST_HEAD(tmp_list); + struct rds_iw_connection *ic = conn->c_transport_data; - /* avoid calling conn_destroy with irqs off */ - spin_lock_irq(&iw_nodev_conns_lock); - list_splice(&iw_nodev_conns, &tmp_list); - INIT_LIST_HEAD(&iw_nodev_conns); - spin_unlock_irq(&iw_nodev_conns_lock); + /* place conn on nodev_conns_list */ + spin_lock(&iw_nodev_conns_lock); - list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { - if (ic->conn->c_passive) - rds_conn_destroy(ic->conn->c_passive); - rds_conn_destroy(ic->conn); - } + spin_lock_irq(&rds_iwdev->spinlock); + BUG_ON(list_empty(&ic->iw_node)); + list_del(&ic->iw_node); + spin_unlock_irq(&rds_iwdev->spinlock); + + list_add_tail(&ic->iw_node, &iw_nodev_conns); + + spin_unlock(&iw_nodev_conns_lock); + + rds_iw_remove_cm_id(ic->rds_iwdev, ic->i_cm_id); + ic->rds_iwdev = NULL; } -void rds_iw_remove_conns(struct rds_iw_device *rds_iwdev) +void __rds_iw_destroy_conns(struct list_head *list, spinlock_t *list_lock) { struct rds_iw_connection *ic, *_ic; LIST_HEAD(tmp_list); /* avoid calling conn_destroy with irqs off */ - spin_lock_irq(&rds_iwdev->spinlock); - list_splice(&rds_iwdev->conn_list, &tmp_list); - INIT_LIST_HEAD(&rds_iwdev->conn_list); - spin_unlock_irq(&rds_iwdev->spinlock); + spin_lock_irq(list_lock); + list_splice(list, &tmp_list); + INIT_LIST_HEAD(list); + spin_unlock_irq(list_lock); list_for_each_entry_safe(ic, _ic, &tmp_list, iw_node) { if (ic->conn->c_passive) -- cgit v1.2.3 From 8cbd9606a6367c221a7bbcc47f3ab1a8c31b6437 Mon Sep 17 00:00:00 2001 From: Andy Grover Date: Wed, 1 Apr 2009 08:20:20 +0000 Subject: RDS: Use spinlock to protect 64b value update on 32b archs We have a 64bit value that needs to be set atomically. This is easy and quick on all 64bit archs, and can also be done on x86/32 with set_64bit() (uses cmpxchg8b). However other 32b archs don't have this. I actually changed this to the current state in preparation for mainline because the old way (using a spinlock on 32b) resulted in unsightly #ifdefs in the code. But obviously, being correct takes precedence. Signed-off-by: Andy Grover Signed-off-by: David S. Miller --- net/rds/ib.h | 14 +++++--------- net/rds/ib_cm.c | 9 ++++++++- net/rds/ib_recv.c | 37 +++++++++++++++++++++++++++++++++++-- net/rds/iw.h | 14 +++++--------- net/rds/iw_cm.c | 9 ++++++++- net/rds/iw_recv.c | 37 +++++++++++++++++++++++++++++++++++-- net/rds/rds.h | 4 ++++ 7 files changed, 100 insertions(+), 24 deletions(-) (limited to 'net') diff --git a/net/rds/ib.h b/net/rds/ib.h index c08ffffb3164..069206cae733 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -108,7 +108,12 @@ struct rds_ib_connection { /* sending acks */ unsigned long i_ack_flags; +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_t i_ack_next; /* next ACK to send */ +#else + spinlock_t i_ack_lock; /* protect i_ack_next */ u64 i_ack_next; /* next ACK to send */ +#endif struct rds_header *i_ack; struct ib_send_wr i_ack_wr; struct ib_sge i_ack_sge; @@ -363,13 +368,4 @@ rds_ib_data_sge(struct rds_ib_connection *ic, struct ib_sge *sge) return &sge[1]; } -static inline void rds_ib_set_64bit(u64 *ptr, u64 val) -{ -#if BITS_PER_LONG == 64 - *ptr = val; -#else - set_64bit(ptr, val); -#endif -} - #endif diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 889ab0441359..f8e40e1a6038 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -636,7 +636,11 @@ void rds_ib_conn_shutdown(struct rds_connection *conn) /* Clear the ACK state */ clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); - rds_ib_set_64bit(&ic->i_ack_next, 0); +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_set(&ic->i_ack_next, 0); +#else + ic->i_ack_next = 0; +#endif ic->i_ack_recv = 0; /* Clear flow control state */ @@ -669,6 +673,9 @@ int rds_ib_conn_alloc(struct rds_connection *conn, gfp_t gfp) INIT_LIST_HEAD(&ic->ib_node); mutex_init(&ic->i_recv_mutex); +#ifndef KERNEL_HAS_ATOMIC64 + spin_lock_init(&ic->i_ack_lock); +#endif /* * rds_ib_conn_shutdown() waits for these to be emptied so they diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 5061b5502162..36d931573ff4 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -395,10 +395,37 @@ void rds_ib_recv_init_ack(struct rds_ib_connection *ic) * room for it beyond the ring size. Send completion notices its special * wr_id and avoids working with the ring in that case. */ +#ifndef KERNEL_HAS_ATOMIC64 static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required) { - rds_ib_set_64bit(&ic->i_ack_next, seq); + unsigned long flags; + + spin_lock_irqsave(&ic->i_ack_lock, flags); + ic->i_ack_next = seq; + if (ack_required) + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + spin_unlock_irqrestore(&ic->i_ack_lock, flags); +} + +static u64 rds_ib_get_ack(struct rds_ib_connection *ic) +{ + unsigned long flags; + u64 seq; + + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + + spin_lock_irqsave(&ic->i_ack_lock, flags); + seq = ic->i_ack_next; + spin_unlock_irqrestore(&ic->i_ack_lock, flags); + + return seq; +} +#else +static void rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, + int ack_required) +{ + atomic64_set(&ic->i_ack_next, seq); if (ack_required) { smp_mb__before_clear_bit(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); @@ -410,8 +437,10 @@ static u64 rds_ib_get_ack(struct rds_ib_connection *ic) clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); smp_mb__after_clear_bit(); - return ic->i_ack_next; + return atomic64_read(&ic->i_ack_next); } +#endif + static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) { @@ -464,6 +493,10 @@ static void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credi * - i_ack_next, which is the last sequence number we received * * Potentially, send queue and receive queue handlers can run concurrently. + * It would be nice to not have to use a spinlock to synchronize things, + * but the one problem that rules this out is that 64bit updates are + * not atomic on all platforms. Things would be a lot simpler if + * we had atomic64 or maybe cmpxchg64 everywhere. * * Reconnecting complicates this picture just slightly. When we * reconnect, we may be seeing duplicate packets. The peer diff --git a/net/rds/iw.h b/net/rds/iw.h index 70eb948f42f4..b4fb27252895 100644 --- a/net/rds/iw.h +++ b/net/rds/iw.h @@ -131,7 +131,12 @@ struct rds_iw_connection { /* sending acks */ unsigned long i_ack_flags; +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_t i_ack_next; /* next ACK to send */ +#else + spinlock_t i_ack_lock; /* protect i_ack_next */ u64 i_ack_next; /* next ACK to send */ +#endif struct rds_header *i_ack; struct ib_send_wr i_ack_wr; struct ib_sge i_ack_sge; @@ -391,13 +396,4 @@ rds_iw_data_sge(struct rds_iw_connection *ic, struct ib_sge *sge) return &sge[1]; } -static inline void rds_iw_set_64bit(u64 *ptr, u64 val) -{ -#if BITS_PER_LONG == 64 - *ptr = val; -#else - set_64bit(ptr, val); -#endif -} - #endif diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c index 0ffaa3e97ad6..a416b0d492b1 100644 --- a/net/rds/iw_cm.c +++ b/net/rds/iw_cm.c @@ -659,7 +659,11 @@ void rds_iw_conn_shutdown(struct rds_connection *conn) /* Clear the ACK state */ clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); - rds_iw_set_64bit(&ic->i_ack_next, 0); +#ifdef KERNEL_HAS_ATOMIC64 + atomic64_set(&ic->i_ack_next, 0); +#else + ic->i_ack_next = 0; +#endif ic->i_ack_recv = 0; /* Clear flow control state */ @@ -693,6 +697,9 @@ int rds_iw_conn_alloc(struct rds_connection *conn, gfp_t gfp) INIT_LIST_HEAD(&ic->iw_node); mutex_init(&ic->i_recv_mutex); +#ifndef KERNEL_HAS_ATOMIC64 + spin_lock_init(&ic->i_ack_lock); +#endif /* * rds_iw_conn_shutdown() waits for these to be emptied so they diff --git a/net/rds/iw_recv.c b/net/rds/iw_recv.c index a1931f0027a2..fde470fa50d5 100644 --- a/net/rds/iw_recv.c +++ b/net/rds/iw_recv.c @@ -395,10 +395,37 @@ void rds_iw_recv_init_ack(struct rds_iw_connection *ic) * room for it beyond the ring size. Send completion notices its special * wr_id and avoids working with the ring in that case. */ +#ifndef KERNEL_HAS_ATOMIC64 static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, int ack_required) { - rds_iw_set_64bit(&ic->i_ack_next, seq); + unsigned long flags; + + spin_lock_irqsave(&ic->i_ack_lock, flags); + ic->i_ack_next = seq; + if (ack_required) + set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + spin_unlock_irqrestore(&ic->i_ack_lock, flags); +} + +static u64 rds_iw_get_ack(struct rds_iw_connection *ic) +{ + unsigned long flags; + u64 seq; + + clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); + + spin_lock_irqsave(&ic->i_ack_lock, flags); + seq = ic->i_ack_next; + spin_unlock_irqrestore(&ic->i_ack_lock, flags); + + return seq; +} +#else +static void rds_iw_set_ack(struct rds_iw_connection *ic, u64 seq, + int ack_required) +{ + atomic64_set(&ic->i_ack_next, seq); if (ack_required) { smp_mb__before_clear_bit(); set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); @@ -410,8 +437,10 @@ static u64 rds_iw_get_ack(struct rds_iw_connection *ic) clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); smp_mb__after_clear_bit(); - return ic->i_ack_next; + return atomic64_read(&ic->i_ack_next); } +#endif + static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credits) { @@ -464,6 +493,10 @@ static void rds_iw_send_ack(struct rds_iw_connection *ic, unsigned int adv_credi * - i_ack_next, which is the last sequence number we received * * Potentially, send queue and receive queue handlers can run concurrently. + * It would be nice to not have to use a spinlock to synchronize things, + * but the one problem that rules this out is that 64bit updates are + * not atomic on all platforms. Things would be a lot simpler if + * we had atomic64 or maybe cmpxchg64 everywhere. * * Reconnecting complicates this picture just slightly. When we * reconnect, we may be seeing duplicate packets. The peer diff --git a/net/rds/rds.h b/net/rds/rds.h index 060400704979..619f0a30a4e5 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -28,6 +28,10 @@ */ #define RDS_PORT 18634 +#ifdef ATOMIC64_INIT +#define KERNEL_HAS_ATOMIC64 +#endif + #ifdef DEBUG #define rdsdebug(fmt, args...) pr_debug("%s(): " fmt, __func__ , ##args) #else -- cgit v1.2.3 From fa9a86ddc8ecd2830a5e773facc250f110300ae7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Apr 2009 00:53:49 -0700 Subject: netfilter: use rcu_read_bh() in ipt_do_table() Commit 784544739a25c30637397ace5489eeb6e15d7d49 (netfilter: iptables: lock free counters) forgot to disable BH in arpt_do_table(), ipt_do_table() and ip6t_do_table() Use rcu_read_lock_bh() instead of rcu_read_lock() cures the problem. Reported-and-bisected-by: Roman Mindalev Signed-off-by: Eric Dumazet Acked-by: Patrick McHardy Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 35c5f6a5cb7c..5ba533d234db 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -253,7 +253,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - rcu_read_lock(); + rcu_read_lock_bh(); private = rcu_dereference(table->private); table_base = rcu_dereference(private->entries[smp_processor_id()]); @@ -329,7 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, } } while (!hotdrop); - rcu_read_unlock(); + rcu_read_unlock_bh(); if (hotdrop) return NF_DROP; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 82ee7c9049ff..810c0b62c7d4 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -339,7 +339,7 @@ ipt_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - rcu_read_lock(); + rcu_read_lock_bh(); private = rcu_dereference(table->private); table_base = rcu_dereference(private->entries[smp_processor_id()]); @@ -437,7 +437,7 @@ ipt_do_table(struct sk_buff *skb, } } while (!hotdrop); - rcu_read_unlock(); + rcu_read_unlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index e89cfa3a8f25..dfed176aed37 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -365,7 +365,7 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - rcu_read_lock(); + rcu_read_lock_bh(); private = rcu_dereference(table->private); table_base = rcu_dereference(private->entries[smp_processor_id()]); @@ -466,7 +466,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - rcu_read_unlock(); + rcu_read_unlock_bh(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; -- cgit v1.2.3 From f2bde7328633269ee935d9ed96535ade15cc348f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Wed, 1 Apr 2009 11:20:20 +0000 Subject: net: allow multiple dev per napi with GRO GRO assumes that there is a one-to-one relationship between NAPI structure and network device. Some devices like sky2 share multiple devices on a single interrupt so only have one NAPI handler. Rather than split GRO from NAPI, just have GRO assume if device changes that it is a different flow. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/dev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index 52fea5b28ca6..91d792d17e09 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2472,8 +2472,9 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) return GRO_NORMAL; for (p = napi->gro_list; p; p = p->next) { - NAPI_GRO_CB(p)->same_flow = !compare_ether_header( - skb_mac_header(p), skb_gro_mac_header(skb)); + NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev) + && !compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); NAPI_GRO_CB(p)->flush = 0; } -- cgit v1.2.3 From 797108d134a91afca9fa59c572336b279bc66afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 1 Apr 2009 23:15:17 +0000 Subject: tcp: add helper for counter tweaking due mid-wq change MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need full-scale adjustment to fix a TCP miscount in the next patch, so just move it into a helper and call for that from the other places. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 66 ++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 32 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index c1f259d2d33b..f1db89bb3aa7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -754,6 +754,36 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, tp->fackets_out -= decr; } +/* Pcount in the middle of the write queue got changed, we need to do various + * tweaks to fix counters + */ +static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) +{ + struct tcp_sock *tp = tcp_sk(sk); + + tp->packets_out -= decr; + + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + tp->sacked_out -= decr; + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) + tp->retrans_out -= decr; + if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) + tp->lost_out -= decr; + + /* Reno case is special. Sigh... */ + if (tcp_is_reno(tp) && decr > 0) + tp->sacked_out -= min_t(u32, tp->sacked_out, decr); + + tcp_adjust_fackets_out(sk, skb, decr); + + if (tp->lost_skb_hint && + before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && + (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) + tp->lost_cnt_hint -= decr; + + tcp_verify_left_out(tp); +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -836,28 +866,8 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, int diff = old_factor - tcp_skb_pcount(skb) - tcp_skb_pcount(buff); - tp->packets_out -= diff; - - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) - tp->sacked_out -= diff; - if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= diff; - - if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) - tp->lost_out -= diff; - - /* Adjust Reno SACK estimate. */ - if (tcp_is_reno(tp) && diff > 0) { - tcp_dec_pcount_approx_int(&tp->sacked_out, diff); - tcp_verify_left_out(tp); - } - tcp_adjust_fackets_out(sk, skb, diff); - - if (tp->lost_skb_hint && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked)) - tp->lost_cnt_hint -= diff; + if (diff) + tcp_adjust_pcount(sk, skb, diff); } /* Link BUFF into the send queue. */ @@ -1768,22 +1778,14 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) * packet counting does not break. */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked & TCPCB_EVER_RETRANS; - if (TCP_SKB_CB(next_skb)->sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out -= tcp_skb_pcount(next_skb); - if (TCP_SKB_CB(next_skb)->sacked & TCPCB_LOST) - tp->lost_out -= tcp_skb_pcount(next_skb); - /* Reno case is special. Sigh... */ - if (tcp_is_reno(tp) && tp->sacked_out) - tcp_dec_pcount_approx(&tp->sacked_out, next_skb); - - tcp_adjust_fackets_out(sk, next_skb, tcp_skb_pcount(next_skb)); - tp->packets_out -= tcp_skb_pcount(next_skb); /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); if (next_skb == tp->retransmit_skb_hint) tp->retransmit_skb_hint = skb; + tcp_adjust_pcount(sk, next_skb, tcp_skb_pcount(next_skb)); + sk_wmem_free_skb(sk, next_skb); } -- cgit v1.2.3 From 9eb9362e569062e2f841b7a023e5fcde10ed63b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 1 Apr 2009 23:18:20 +0000 Subject: tcp: miscounts due to tcp_fragment pcount reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It seems that trivial reset of pcount to one was not sufficient in tcp_retransmit_skb. Multiple counters experience a positive miscount when skb's pcount gets lowered without the necessary adjustments (depending on skb's sacked bits which exactly), at worst a packets_out miscount can crash at RTO if the write queue is empty! Triggering this requires mss change, so bidir tcp or mtu probe or like. Signed-off-by: Ilpo Järvinen Reported-by: Markus Trippelsdorf Tested-by: Uwe Bugla Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f1db89bb3aa7..53300fa2359f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1893,7 +1893,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ } else { - tcp_init_tso_segs(sk, skb, cur_mss); + int oldpcount = tcp_skb_pcount(skb); + + if (unlikely(oldpcount > 1)) { + tcp_init_tso_segs(sk, skb, cur_mss); + tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); + } } tcp_retrans_try_collapse(sk, skb, cur_mss); -- cgit v1.2.3