diff options
Diffstat (limited to 'net')
150 files changed, 3636 insertions, 1920 deletions
diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 5bdd300db0f7..2df34eb5d65f 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -272,7 +272,7 @@ static int parse_qos(const char *buff) qos.rxtp.max_pcr = rx_pcr; qos.rxtp.max_sdu = rx_sdu; qos.aal = ATM_AAL5; - dprintk("parse_qos(): setting qos paramameters to tx=%d,%d rx=%d,%d\n", + dprintk("parse_qos(): setting qos parameters to tx=%d,%d rx=%d,%d\n", qos.txtp.max_pcr, qos.txtp.max_sdu, qos.rxtp.max_pcr, qos.rxtp.max_sdu); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 46b114c0140b..476709bd068a 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3200,7 +3200,7 @@ EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { - int i, id; + int id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); @@ -3214,9 +3214,6 @@ void hci_unregister_dev(struct hci_dev *hdev) hci_dev_do_close(hdev); - for (i = 0; i < NUM_REASSEMBLY; i++) - kfree_skb(hdev->reassembly[i]); - cancel_work_sync(&hdev->power_on); if (!test_bit(HCI_INIT, &hdev->flags) && @@ -3320,149 +3317,6 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) } EXPORT_SYMBOL(hci_recv_frame); -static int hci_reassembly(struct hci_dev *hdev, int type, void *data, - int count, __u8 index) -{ - int len = 0; - int hlen = 0; - int remain = count; - struct sk_buff *skb; - struct bt_skb_cb *scb; - - if ((type < HCI_ACLDATA_PKT || type > HCI_EVENT_PKT) || - index >= NUM_REASSEMBLY) - return -EILSEQ; - - skb = hdev->reassembly[index]; - - if (!skb) { - switch (type) { - case HCI_ACLDATA_PKT: - len = HCI_MAX_FRAME_SIZE; - hlen = HCI_ACL_HDR_SIZE; - break; - case HCI_EVENT_PKT: - len = HCI_MAX_EVENT_SIZE; - hlen = HCI_EVENT_HDR_SIZE; - break; - case HCI_SCODATA_PKT: - len = HCI_MAX_SCO_SIZE; - hlen = HCI_SCO_HDR_SIZE; - break; - } - - skb = bt_skb_alloc(len, GFP_ATOMIC); - if (!skb) - return -ENOMEM; - - scb = (void *) skb->cb; - scb->expect = hlen; - scb->pkt_type = type; - - hdev->reassembly[index] = skb; - } - - while (count) { - scb = (void *) skb->cb; - len = min_t(uint, scb->expect, count); - - memcpy(skb_put(skb, len), data, len); - - count -= len; - data += len; - scb->expect -= len; - remain = count; - - switch (type) { - case HCI_EVENT_PKT: - if (skb->len == HCI_EVENT_HDR_SIZE) { - struct hci_event_hdr *h = hci_event_hdr(skb); - scb->expect = h->plen; - - if (skb_tailroom(skb) < scb->expect) { - kfree_skb(skb); - hdev->reassembly[index] = NULL; - return -ENOMEM; - } - } - break; - - case HCI_ACLDATA_PKT: - if (skb->len == HCI_ACL_HDR_SIZE) { - struct hci_acl_hdr *h = hci_acl_hdr(skb); - scb->expect = __le16_to_cpu(h->dlen); - - if (skb_tailroom(skb) < scb->expect) { - kfree_skb(skb); - hdev->reassembly[index] = NULL; - return -ENOMEM; - } - } - break; - - case HCI_SCODATA_PKT: - if (skb->len == HCI_SCO_HDR_SIZE) { - struct hci_sco_hdr *h = hci_sco_hdr(skb); - scb->expect = h->dlen; - - if (skb_tailroom(skb) < scb->expect) { - kfree_skb(skb); - hdev->reassembly[index] = NULL; - return -ENOMEM; - } - } - break; - } - - if (scb->expect == 0) { - /* Complete frame */ - - bt_cb(skb)->pkt_type = type; - hci_recv_frame(hdev, skb); - - hdev->reassembly[index] = NULL; - return remain; - } - } - - return remain; -} - -#define STREAM_REASSEMBLY 0 - -int hci_recv_stream_fragment(struct hci_dev *hdev, void *data, int count) -{ - int type; - int rem = 0; - - while (count) { - struct sk_buff *skb = hdev->reassembly[STREAM_REASSEMBLY]; - - if (!skb) { - struct { char type; } *pkt; - - /* Start of the frame */ - pkt = data; - type = pkt->type; - - data++; - count--; - } else - type = bt_cb(skb)->pkt_type; - - rem = hci_reassembly(hdev, type, data, count, - STREAM_REASSEMBLY); - if (rem < 0) - return rem; - - data += (count - rem); - count = rem; - } - - return rem; -} -EXPORT_SYMBOL(hci_recv_stream_fragment); - /* ---- Interface to upper protocols ---- */ int hci_register_cb(struct hci_cb *cb) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 01031038eb0e..7b61be73650f 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -2036,6 +2036,33 @@ unlock: hci_dev_unlock(hdev); } +static void hci_cs_le_read_remote_features(struct hci_dev *hdev, u8 status) +{ + struct hci_cp_le_read_remote_features *cp; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, status); + + if (!status) + return; + + cp = hci_sent_cmd_data(hdev, HCI_OP_LE_READ_REMOTE_FEATURES); + if (!cp) + return; + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(cp->handle)); + if (conn) { + if (conn->state == BT_CONFIG) { + hci_connect_cfm(conn, status); + hci_conn_drop(conn); + } + } + + hci_dev_unlock(hdev); +} + static void hci_cs_le_start_enc(struct hci_dev *hdev, u8 status) { struct hci_cp_le_start_enc *cp; @@ -3104,6 +3131,10 @@ static void hci_cmd_status_evt(struct hci_dev *hdev, struct sk_buff *skb, hci_cs_le_create_conn(hdev, ev->status); break; + case HCI_OP_LE_READ_REMOTE_FEATURES: + hci_cs_le_read_remote_features(hdev, ev->status); + break; + case HCI_OP_LE_START_ENC: hci_cs_le_start_enc(hdev, ev->status); break; @@ -4515,7 +4546,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) conn->sec_level = BT_SECURITY_LOW; conn->handle = __le16_to_cpu(ev->handle); - conn->state = BT_CONNECTED; + conn->state = BT_CONFIG; conn->le_conn_interval = le16_to_cpu(ev->interval); conn->le_conn_latency = le16_to_cpu(ev->latency); @@ -4524,7 +4555,33 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_debugfs_create_conn(conn); hci_conn_add_sysfs(conn); - hci_connect_cfm(conn, ev->status); + if (!ev->status) { + /* The remote features procedure is defined for master + * role only. So only in case of an initiated connection + * request the remote features. + * + * If the local controller supports slave-initiated features + * exchange, then requesting the remote features in slave + * role is possible. Otherwise just transition into the + * connected state without requesting the remote features. + */ + if (conn->out || + (hdev->le_features[0] & HCI_LE_SLAVE_FEATURES)) { + struct hci_cp_le_read_remote_features cp; + + cp.handle = __cpu_to_le16(conn->handle); + + hci_send_cmd(hdev, HCI_OP_LE_READ_REMOTE_FEATURES, + sizeof(cp), &cp); + + hci_conn_hold(conn); + } else { + conn->state = BT_CONNECTED; + hci_connect_cfm(conn, ev->status); + } + } else { + hci_connect_cfm(conn, ev->status); + } params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst, conn->dst_type); @@ -4826,6 +4883,48 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_dev_unlock(hdev); } +static void hci_le_remote_feat_complete_evt(struct hci_dev *hdev, + struct sk_buff *skb) +{ + struct hci_ev_le_remote_feat_complete *ev = (void *)skb->data; + struct hci_conn *conn; + + BT_DBG("%s status 0x%2.2x", hdev->name, ev->status); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, __le16_to_cpu(ev->handle)); + if (conn) { + if (!ev->status) + memcpy(conn->features[0], ev->features, 8); + + if (conn->state == BT_CONFIG) { + __u8 status; + + /* If the local controller supports slave-initiated + * features exchange, but the remote controller does + * not, then it is possible that the error code 0x1a + * for unsupported remote feature gets returned. + * + * In this specific case, allow the connection to + * transition into connected state and mark it as + * successful. + */ + if ((hdev->le_features[0] & HCI_LE_SLAVE_FEATURES) && + !conn->out && ev->status == 0x1a) + status = 0x00; + else + status = ev->status; + + conn->state = BT_CONNECTED; + hci_connect_cfm(conn, status); + hci_conn_drop(conn); + } + } + + hci_dev_unlock(hdev); +} + static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_ev_le_ltk_req *ev = (void *) skb->data; @@ -4999,6 +5098,10 @@ static void hci_le_meta_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_le_adv_report_evt(hdev, skb); break; + case HCI_EV_LE_REMOTE_FEAT_COMPLETE: + hci_le_remote_feat_complete_evt(hdev, skb); + break; + case HCI_EV_LE_LTK_REQ: hci_le_ltk_request_evt(hdev, skb); break; diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 845dfcc43a20..7fd87e7135b5 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -6466,6 +6466,145 @@ static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, return eir_len; } +static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, + u16 opcode, struct sk_buff *skb) +{ + const struct mgmt_cp_read_local_oob_ext_data *mgmt_cp; + struct mgmt_rp_read_local_oob_ext_data *mgmt_rp; + u8 *h192, *r192, *h256, *r256; + struct mgmt_pending_cmd *cmd; + u16 eir_len; + int err; + + BT_DBG("%s status %u", hdev->name, status); + + cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev); + if (!cmd) + return; + + mgmt_cp = cmd->param; + + if (status) { + status = mgmt_status(status); + eir_len = 0; + + h192 = NULL; + r192 = NULL; + h256 = NULL; + r256 = NULL; + } else if (opcode == HCI_OP_READ_LOCAL_OOB_DATA) { + struct hci_rp_read_local_oob_data *rp; + + if (skb->len != sizeof(*rp)) { + status = MGMT_STATUS_FAILED; + eir_len = 0; + } else { + status = MGMT_STATUS_SUCCESS; + rp = (void *)skb->data; + + eir_len = 5 + 18 + 18; + h192 = rp->hash; + r192 = rp->rand; + h256 = NULL; + r256 = NULL; + } + } else { + struct hci_rp_read_local_oob_ext_data *rp; + + if (skb->len != sizeof(*rp)) { + status = MGMT_STATUS_FAILED; + eir_len = 0; + } else { + status = MGMT_STATUS_SUCCESS; + rp = (void *)skb->data; + + if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { + eir_len = 5 + 18 + 18; + h192 = NULL; + r192 = NULL; + } else { + eir_len = 5 + 18 + 18 + 18 + 18; + h192 = rp->hash192; + r192 = rp->rand192; + } + + h256 = rp->hash256; + r256 = rp->rand256; + } + } + + mgmt_rp = kmalloc(sizeof(*mgmt_rp) + eir_len, GFP_KERNEL); + if (!mgmt_rp) + goto done; + + if (status) + goto send_rsp; + + eir_len = eir_append_data(mgmt_rp->eir, 0, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + + if (h192 && r192) { + eir_len = eir_append_data(mgmt_rp->eir, eir_len, + EIR_SSP_HASH_C192, h192, 16); + eir_len = eir_append_data(mgmt_rp->eir, eir_len, + EIR_SSP_RAND_R192, r192, 16); + } + + if (h256 && r256) { + eir_len = eir_append_data(mgmt_rp->eir, eir_len, + EIR_SSP_HASH_C256, h256, 16); + eir_len = eir_append_data(mgmt_rp->eir, eir_len, + EIR_SSP_RAND_R256, r256, 16); + } + +send_rsp: + mgmt_rp->type = mgmt_cp->type; + mgmt_rp->eir_len = cpu_to_le16(eir_len); + + err = mgmt_cmd_complete(cmd->sk, hdev->id, + MGMT_OP_READ_LOCAL_OOB_EXT_DATA, status, + mgmt_rp, sizeof(*mgmt_rp) + eir_len); + if (err < 0 || status) + goto done; + + hci_sock_set_flag(cmd->sk, HCI_MGMT_OOB_DATA_EVENTS); + + err = mgmt_limited_event(MGMT_EV_LOCAL_OOB_DATA_UPDATED, hdev, + mgmt_rp, sizeof(*mgmt_rp) + eir_len, + HCI_MGMT_OOB_DATA_EVENTS, cmd->sk); +done: + kfree(mgmt_rp); + mgmt_pending_remove(cmd); +} + +static int read_local_ssp_oob_req(struct hci_dev *hdev, struct sock *sk, + struct mgmt_cp_read_local_oob_ext_data *cp) +{ + struct mgmt_pending_cmd *cmd; + struct hci_request req; + int err; + + cmd = mgmt_pending_add(sk, MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev, + cp, sizeof(*cp)); + if (!cmd) + return -ENOMEM; + + hci_req_init(&req, hdev); + + if (bredr_sc_enabled(hdev)) + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_EXT_DATA, 0, NULL); + else + hci_req_add(&req, HCI_OP_READ_LOCAL_OOB_DATA, 0, NULL); + + err = hci_req_run_skb(&req, read_local_oob_ext_data_complete); + if (err < 0) { + mgmt_pending_remove(cmd); + return err; + } + + return 0; +} + static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -6517,8 +6656,19 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev, eir_len = 0; switch (cp->type) { case BIT(BDADDR_BREDR): - eir_len = eir_append_data(rp->eir, eir_len, EIR_CLASS_OF_DEV, - hdev->dev_class, 3); + if (hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) { + err = read_local_ssp_oob_req(hdev, sk, cp); + hci_dev_unlock(hdev); + if (!err) + goto done; + + status = MGMT_STATUS_FAILED; + goto complete; + } else { + eir_len = eir_append_data(rp->eir, eir_len, + EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + } break; case (BIT(BDADDR_LE_PUBLIC) | BIT(BDADDR_LE_RANDOM)): if (hci_dev_test_flag(hdev, HCI_SC_ENABLED) && diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 3304a5442331..e97572b5d2cc 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -35,7 +35,7 @@ static inline int should_deliver(const struct net_bridge_port *p, p->state == BR_STATE_FORWARDING; } -int br_dev_queue_push_xmit(struct sk_buff *skb) +int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb) { if (!is_skb_forwardable(skb->dev, skb)) { kfree_skb(skb); @@ -49,9 +49,10 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) } EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit); -int br_forward_finish(struct sk_buff *skb) +int br_forward_finish(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, skb, NULL, skb->dev, + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING, sk, skb, + NULL, skb->dev, br_dev_queue_push_xmit); } @@ -75,7 +76,8 @@ static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb) return; } - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, br_forward_finish); } @@ -96,7 +98,8 @@ static void __br_forward(const struct net_bridge_port *to, struct sk_buff *skb) skb->dev = to->dev; skb_forward_csum(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, indev, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, NULL, skb, + indev, skb->dev, br_forward_finish); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 052c5ebbc947..f921a5dce22d 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -55,8 +55,9 @@ static int br_pass_frame_up(struct sk_buff *skb) if (!skb) return NET_RX_DROP; - return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, indev, NULL, - netif_receive_skb); + return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, + indev, NULL, + netif_receive_skb_sk); } static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, @@ -119,7 +120,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, } /* note: already called with rcu_read_lock */ -int br_handle_frame_finish(struct sk_buff *skb) +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb) { const unsigned char *dest = eth_hdr(skb)->h_dest; struct net_bridge_port *p = br_port_get_rcu(skb->dev); @@ -207,7 +208,7 @@ drop: EXPORT_SYMBOL_GPL(br_handle_frame_finish); /* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct sk_buff *skb) +static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -277,8 +278,8 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) } /* Deliver packet to local host only */ - if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, skb, skb->dev, - NULL, br_handle_local_finish)) { + if (NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb, + skb->dev, NULL, br_handle_local_finish)) { return RX_HANDLER_CONSUMED; /* consumed by filter */ } else { *pskb = skb; @@ -302,7 +303,8 @@ forward: if (ether_addr_equal(p->br->dev->dev_addr, dest)) skb->pkt_type = PACKET_HOST; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, NULL, skb, + skb->dev, NULL, br_handle_frame_finish); break; default: diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index c465876c7861..4b6722f8f179 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -814,7 +814,8 @@ static void __br_multicast_send_query(struct net_bridge *br, if (port) { skb->dev = port->dev; - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, br_dev_queue_push_xmit); } else { br_multicast_select_own_querier(br, ip, skb); diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 7527e94dd5dc..ab55e2472beb 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -111,6 +111,24 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb) pppoe_proto(skb) == htons(PPP_IPV6) && \ brnf_filter_pppoe_tagged) +/* largest possible L2 header, see br_nf_dev_queue_xmit() */ +#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN) + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) +struct brnf_frag_data { + char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH]; + u8 encap_size; + u8 size; +}; + +static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage); +#endif + +static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb) +{ + return skb->nf_bridge; +} + static inline struct rtable *bridge_parent_rtable(const struct net_device *dev) { struct net_bridge_port *port; @@ -189,14 +207,6 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb) skb->network_header += len; } -static inline void nf_bridge_save_header(struct sk_buff *skb) -{ - int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - - skb_copy_from_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); -} - /* When handing a packet over to the IP layer * check whether we have a skb that is in the * expected format @@ -252,23 +262,29 @@ drop: static void nf_bridge_update_protocol(struct sk_buff *skb) { - if (skb->nf_bridge->mask & BRNF_8021Q) + switch (skb->nf_bridge->orig_proto) { + case BRNF_PROTO_8021Q: skb->protocol = htons(ETH_P_8021Q); - else if (skb->nf_bridge->mask & BRNF_PPPoE) + break; + case BRNF_PROTO_PPPOE: skb->protocol = htons(ETH_P_PPP_SES); + break; + case BRNF_PROTO_UNCHANGED: + break; + } } /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ -static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) +static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; @@ -282,7 +298,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, br_handle_frame_finish, 1); return 0; @@ -293,9 +310,8 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb) * don't, we use the neighbour framework to find out. In both cases, we make * sure that br_handle_frame_finish() is called afterwards. */ -static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) +static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct neighbour *neigh; struct dst_entry *dst; @@ -305,12 +321,13 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) dst = skb_dst(skb); neigh = dst_neigh_lookup_skb(dst, skb); if (neigh) { + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); int ret; if (neigh->hh.hh_len) { neigh_hh_bridge(&neigh->hh, skb); skb->dev = nf_bridge->physindev; - ret = br_handle_frame_finish(skb); + ret = br_handle_frame_finish(sk, skb); } else { /* the neighbour function below overwrites the complete * MAC header, so we save the Ethernet source address and @@ -318,7 +335,7 @@ static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) */ skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, + nf_bridge->neigh_header, ETH_HLEN-ETH_ALEN); /* tell br_dev_xmit to continue with forwarding */ nf_bridge->mask |= BRNF_BRIDGED_DNAT; @@ -387,11 +404,11 @@ static bool dnat_took_place(const struct sk_buff *skb) * device, we proceed as if ip_route_input() succeeded. If it differs from the * logical bridge port or if ip_route_output_key() fails we drop the packet. */ -static int br_nf_pre_routing_finish(struct sk_buff *skb) +static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct iphdr *iph = ip_hdr(skb); - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; int err; int frag_max_size; @@ -399,9 +416,9 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb) frag_max_size = IPCB(skb)->frag_max_size; BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; if (dnat_took_place(skb)) { @@ -440,7 +457,7 @@ bridged_dnat: nf_bridge_push_encap_header(skb); NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - skb, skb->dev, NULL, + sk, skb, skb->dev, NULL, br_nf_pre_routing_finish_bridge, 1); return 0; @@ -460,7 +477,8 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, sk, skb, + skb->dev, NULL, br_handle_frame_finish, 1); return 0; @@ -483,20 +501,21 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct /* Some common code for IPv4/IPv6 */ static struct net_device *setup_pre_routing(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge->physindev = skb->dev; skb->dev = brnf_get_logical_dev(skb, skb->dev); + if (skb->protocol == htons(ETH_P_8021Q)) - nf_bridge->mask |= BRNF_8021Q; + nf_bridge->orig_proto = BRNF_PROTO_8021Q; else if (skb->protocol == htons(ETH_P_PPP_SES)) - nf_bridge->mask |= BRNF_PPPoE; + nf_bridge->orig_proto = BRNF_PROTO_PPPOE; /* Must drop socket now because of tproxy. */ skb_orphan(skb); @@ -596,7 +615,8 @@ static unsigned int br_nf_pre_routing_ipv6(const struct nf_hook_ops *ops, return NF_DROP; skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, br_nf_pre_routing_finish_ipv6); return NF_STOLEN; @@ -651,7 +671,8 @@ static unsigned int br_nf_pre_routing(const struct nf_hook_ops *ops, skb->protocol = htons(ETH_P_IP); - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, state->sk, skb, + skb->dev, NULL, br_nf_pre_routing_finish); return NF_STOLEN; @@ -674,16 +695,23 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops, } /* PF_BRIDGE/FORWARD *************************************************/ -static int br_nf_forward_finish(struct sk_buff *skb) +static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *in; if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) { + int frag_max_size; + + if (skb->protocol == htons(ETH_P_IP)) { + frag_max_size = IPCB(skb)->frag_max_size; + BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size; + } + in = nf_bridge->physindev; - if (nf_bridge->mask & BRNF_PKT_TYPE) { + if (nf_bridge->pkt_otherhost) { skb->pkt_type = PACKET_OTHERHOST; - nf_bridge->mask ^= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = false; } nf_bridge_update_protocol(skb); } else { @@ -691,8 +719,8 @@ static int br_nf_forward_finish(struct sk_buff *skb) } nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in, - skb->dev, br_forward_finish, 1); + NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, sk, skb, + in, skb->dev, br_forward_finish, 1); return 0; } @@ -718,6 +746,10 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, if (!nf_bridge_unshare(skb)) return NF_DROP; + nf_bridge = nf_bridge_info_get(skb); + if (!nf_bridge) + return NF_DROP; + parent = bridge_parent(state->out); if (!parent) return NF_DROP; @@ -731,14 +763,19 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, nf_bridge_pull_encap_header(skb); - nf_bridge = skb->nf_bridge; if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } - if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb)) - return NF_DROP; + if (pf == NFPROTO_IPV4) { + int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size; + + if (br_parse_ip_options(skb)) + return NF_DROP; + + IPCB(skb)->frag_max_size = frag_max; + } nf_bridge->physoutdev = skb->dev; if (pf == NFPROTO_IPV4) @@ -746,7 +783,8 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops, else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_FORWARD, skb, brnf_get_logical_dev(skb, state->in), + NF_HOOK(pf, NF_INET_FORWARD, NULL, skb, + brnf_get_logical_dev(skb, state->in), parent, br_nf_forward_finish); return NF_STOLEN; @@ -780,69 +818,74 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops, return NF_ACCEPT; } *d = state->in; - NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, state->in, - state->out, br_nf_forward_finish); + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, state->sk, skb, + state->in, state->out, br_nf_forward_finish); return NF_STOLEN; } #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4) -static bool nf_bridge_copy_header(struct sk_buff *skb) +static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb) { + struct brnf_frag_data *data; int err; - unsigned int header_size; - - nf_bridge_update_protocol(skb); - header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow_head(skb, header_size); - if (err) - return false; - skb_copy_to_linear_data_offset(skb, -header_size, - skb->nf_bridge->data, header_size); - __skb_push(skb, nf_bridge_encap_header_len(skb)); - return true; -} + data = this_cpu_ptr(&brnf_frag_data_storage); + err = skb_cow_head(skb, data->size); -static int br_nf_push_frag_xmit(struct sk_buff *skb) -{ - if (!nf_bridge_copy_header(skb)) { + if (err) { kfree_skb(skb); return 0; } - return br_dev_queue_push_xmit(skb); + skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size); + __skb_push(skb, data->encap_size); + + return br_dev_queue_push_xmit(sk, skb); } -static int br_nf_dev_queue_xmit(struct sk_buff *skb) +static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { int ret; int frag_max_size; unsigned int mtu_reserved; if (skb_is_gso(skb) || skb->protocol != htons(ETH_P_IP)) - return br_dev_queue_push_xmit(skb); + return br_dev_queue_push_xmit(sk, skb); mtu_reserved = nf_bridge_mtu_reduction(skb); /* This is wrong! We should preserve the original fragment * boundaries by preserving frag_list rather than refragmenting. */ if (skb->len + mtu_reserved > skb->dev->mtu) { + struct brnf_frag_data *data; + frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; if (br_parse_ip_options(skb)) /* Drop invalid packet */ return NF_DROP; IPCB(skb)->frag_max_size = frag_max_size; - ret = ip_fragment(skb, br_nf_push_frag_xmit); - } else - ret = br_dev_queue_push_xmit(skb); + + nf_bridge_update_protocol(skb); + + data = this_cpu_ptr(&brnf_frag_data_storage); + data->encap_size = nf_bridge_encap_header_len(skb); + data->size = ETH_HLEN + data->encap_size; + + skb_copy_from_linear_data_offset(skb, -data->size, data->mac, + data->size); + + ret = ip_fragment(sk, skb, br_nf_push_frag_xmit); + } else { + ret = br_dev_queue_push_xmit(sk, skb); + } return ret; } #else -static int br_nf_dev_queue_xmit(struct sk_buff *skb) +static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb) { - return br_dev_queue_push_xmit(skb); + return br_dev_queue_push_xmit(sk, skb); } #endif @@ -851,7 +894,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct nf_hook_state *state) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct net_device *realoutdev = bridge_parent(skb->dev); u_int8_t pf; @@ -877,17 +920,17 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops, * about the value of skb->pkt_type. */ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; - nf_bridge->mask |= BRNF_PKT_TYPE; + nf_bridge->pkt_otherhost = true; } nf_bridge_pull_encap_header(skb); - nf_bridge_save_header(skb); if (pf == NFPROTO_IPV4) skb->protocol = htons(ETH_P_IP); else skb->protocol = htons(ETH_P_IPV6); - NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev, + NF_HOOK(pf, NF_INET_POST_ROUTING, state->sk, skb, + NULL, realoutdev, br_nf_dev_queue_xmit); return NF_STOLEN; @@ -919,15 +962,18 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops, */ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb) { - struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); skb_pull(skb, ETH_HLEN); nf_bridge->mask &= ~BRNF_BRIDGED_DNAT; - skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), - skb->nf_bridge->data, ETH_HLEN-ETH_ALEN); + BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN)); + + skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN), + nf_bridge->neigh_header, + ETH_HLEN - ETH_ALEN); skb->dev = nf_bridge->physindev; - br_handle_frame_finish(skb); + br_handle_frame_finish(NULL, skb); } static int br_nf_dev_xmit(struct sk_buff *skb) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index b46fa0c5b8ec..6ca0251cb478 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -410,10 +410,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, /* br_forward.c */ void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb); -int br_dev_queue_push_xmit(struct sk_buff *skb); +int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb); void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, struct sk_buff *skb0); -int br_forward_finish(struct sk_buff *skb); +int br_forward_finish(struct sock *sk, struct sk_buff *skb); void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast); void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, struct sk_buff *skb2, bool unicast); @@ -431,7 +431,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask); void br_manage_promisc(struct net_bridge *br); /* br_input.c */ -int br_handle_frame_finish(struct sk_buff *skb); +int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); rx_handler_result_t br_handle_frame(struct sk_buff **pskb); static inline bool br_rx_handler_check_rcu(const struct net_device *dev) diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index bdb459d21ad8..534fc4cd263e 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -54,8 +54,9 @@ static void br_send_bpdu(struct net_bridge_port *p, skb_reset_mac_header(skb); - NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, - dev_queue_xmit); + NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_OUT, NULL, skb, + NULL, skb->dev, + dev_queue_xmit_sk); } static inline void br_set_ticks(unsigned char *dest, int j) diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index 4f02109d708f..a21269b83f16 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -19,12 +19,12 @@ #include "../br_private.h" static void nft_meta_bridge_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct net_device *in = pkt->in, *out = pkt->out; - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; const struct net_bridge_port *p; switch (priv->key) { @@ -40,12 +40,12 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr, goto out; } - strncpy((char *)dest->data, p->br->dev->name, sizeof(dest->data)); + strncpy((char *)dest, p->br->dev->name, IFNAMSIZ); return; out: - return nft_meta_get_eval(expr, data, pkt); + return nft_meta_get_eval(expr, regs, pkt); err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, @@ -53,27 +53,21 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); - int err; + unsigned int len; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_BRI_IIFNAME: case NFT_META_BRI_OIFNAME: + len = IFNAMSIZ; break; default: return nft_meta_get_init(ctx, expr, tb); } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - return err; - - return 0; + priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); } static struct nft_expr_type nft_meta_bridge_type; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 54a2fdf0f457..858d848564ee 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -257,8 +257,8 @@ static void nft_reject_br_send_v6_unreach(struct net *net, } static void nft_reject_bridge_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); @@ -310,7 +310,7 @@ static void nft_reject_bridge_eval(const struct nft_expr *expr, break; } out: - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; } static int nft_reject_bridge_validate(const struct nft_ctx *ctx, @@ -371,6 +371,8 @@ static int nft_reject_bridge_dump(struct sk_buff *skb, if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; + default: + break; } return 0; diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index 6b3f54ed65ba..a9f4ae45b7fb 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -484,7 +484,7 @@ static int ceph_tcp_connect(struct ceph_connection *con) IPPROTO_TCP, &sock); if (ret) return ret; - sock->sk->sk_allocation = GFP_NOFS | __GFP_MEMALLOC; + sock->sk->sk_allocation = GFP_NOFS; #ifdef CONFIG_LOCKDEP lockdep_set_class(&sock->sk->sk_lock, &socket_class); @@ -520,8 +520,6 @@ static int ceph_tcp_connect(struct ceph_connection *con) ret); } - sk_set_memalloc(sock->sk); - con->sock = sock; return 0; } @@ -2808,11 +2806,8 @@ static void con_work(struct work_struct *work) { struct ceph_connection *con = container_of(work, struct ceph_connection, work.work); - unsigned long pflags = current->flags; bool fault; - current->flags |= PF_MEMALLOC; - mutex_lock(&con->mutex); while (true) { int ret; @@ -2866,8 +2861,6 @@ static void con_work(struct work_struct *work) con_fault_finish(con); con->ops->put(con); - - tsk_restore_flags(current, pflags, PF_MEMALLOC); } /* diff --git a/net/core/dev.c b/net/core/dev.c index 3b3965288f52..af4a1b0adc10 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1630,6 +1630,22 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +#ifdef CONFIG_NET_CLS_ACT +static struct static_key ingress_needed __read_mostly; + +void net_inc_ingress_queue(void) +{ + static_key_slow_inc(&ingress_needed); +} +EXPORT_SYMBOL_GPL(net_inc_ingress_queue); + +void net_dec_ingress_queue(void) +{ + static_key_slow_dec(&ingress_needed); +} +EXPORT_SYMBOL_GPL(net_dec_ingress_queue); +#endif + static struct static_key netstamp_needed __read_mostly; #ifdef HAVE_JUMP_LABEL /* We are not allowed to call static_key_slow_dec() from irq context @@ -2879,7 +2895,7 @@ EXPORT_SYMBOL(xmit_recursion); * dev_loopback_xmit - loop back @skb * @skb: buffer to transmit */ -int dev_loopback_xmit(struct sk_buff *skb) +int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb) { skb_reset_mac_header(skb); __skb_pull(skb, skb_network_offset(skb)); @@ -3017,11 +3033,11 @@ out: return rc; } -int dev_queue_xmit(struct sk_buff *skb) +int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb) { return __dev_queue_xmit(skb, NULL); } -EXPORT_SYMBOL(dev_queue_xmit); +EXPORT_SYMBOL(dev_queue_xmit_sk); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv) { @@ -3547,7 +3563,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue); if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc) - goto out; + return skb; if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); @@ -3561,8 +3577,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb, return NULL; } -out: - skb->tc_verd = 0; return skb; } #endif @@ -3698,12 +3712,15 @@ another_round: skip_taps: #ifdef CONFIG_NET_CLS_ACT - skb = handle_ing(skb, &pt_prev, &ret, orig_dev); - if (!skb) - goto unlock; + if (static_key_false(&ingress_needed)) { + skb = handle_ing(skb, &pt_prev, &ret, orig_dev); + if (!skb) + goto unlock; + } + + skb->tc_verd = 0; ncls: #endif - if (pfmemalloc && !skb_pfmemalloc_protocol(skb)) goto drop; @@ -3853,13 +3870,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb) * NET_RX_SUCCESS: no congestion * NET_RX_DROP: packet was dropped */ -int netif_receive_skb(struct sk_buff *skb) +int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb) { trace_netif_receive_skb_entry(skb); return netif_receive_skb_internal(skb); } -EXPORT_SYMBOL(netif_receive_skb); +EXPORT_SYMBOL(netif_receive_skb_sk); /* Network device is going away, flush any packets still pending * Called with irqs disabled. diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index e7345d9031df..a3abb719221f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -148,9 +148,11 @@ static void ops_free_list(const struct pernet_operations *ops, } } +static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, + int id); static int alloc_netid(struct net *net, struct net *peer, int reqid) { - int min = 0, max = 0; + int min = 0, max = 0, id; ASSERT_RTNL(); @@ -159,7 +161,11 @@ static int alloc_netid(struct net *net, struct net *peer, int reqid) max = reqid + 1; } - return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); + id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL); + if (id >= 0) + rtnl_net_notifyid(net, peer, RTM_NEWNSID, id); + + return id; } /* This function is used by idr_for_each(). If net is equal to peer, the @@ -359,8 +365,10 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id = __peernet2id(tmp, net, false); - if (id >= 0) + if (id >= 0) { + rtnl_net_notifyid(tmp, net, RTM_DELNSID, id); idr_remove(&tmp->netns_ids, id); + } } idr_destroy(&net->netns_ids); @@ -531,7 +539,8 @@ static int rtnl_net_get_size(void) } static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, - int cmd, struct net *net, struct net *peer) + int cmd, struct net *net, struct net *peer, + int nsid) { struct nlmsghdr *nlh; struct rtgenmsg *rth; @@ -546,9 +555,13 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, rth = nlmsg_data(nlh); rth->rtgen_family = AF_UNSPEC; - id = __peernet2id(net, peer, false); - if (id < 0) - id = NETNSA_NSID_NOT_ASSIGNED; + if (nsid >= 0) { + id = nsid; + } else { + id = __peernet2id(net, peer, false); + if (id < 0) + id = NETNSA_NSID_NOT_ASSIGNED; + } if (nla_put_s32(skb, NETNSA_NSID, id)) goto nla_put_failure; @@ -565,8 +578,8 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; struct sk_buff *msg; - int err = -ENOBUFS; struct net *peer; + int err; err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, rtnl_net_policy); @@ -589,7 +602,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) } err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, - RTM_GETNSID, net, peer); + RTM_GETNSID, net, peer, -1); if (err < 0) goto err_out; @@ -603,6 +616,75 @@ out: return err; } +struct rtnl_net_dump_cb { + struct net *net; + struct sk_buff *skb; + struct netlink_callback *cb; + int idx; + int s_idx; +}; + +static int rtnl_net_dumpid_one(int id, void *peer, void *data) +{ + struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; + int ret; + + if (net_cb->idx < net_cb->s_idx) + goto cont; + + ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, + net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWNSID, net_cb->net, peer, id); + if (ret < 0) + return ret; + +cont: + net_cb->idx++; + return 0; +} + +static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct rtnl_net_dump_cb net_cb = { + .net = net, + .skb = skb, + .cb = cb, + .idx = 0, + .s_idx = cb->args[0], + }; + + ASSERT_RTNL(); + + idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); + + cb->args[0] = net_cb.idx; + return skb->len; +} + +static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd, + int id) +{ + struct sk_buff *msg; + int err = -ENOMEM; + + msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); + if (!msg) + goto out; + + err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id); + if (err < 0) + goto err_out; + + rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); + return; + +err_out: + nlmsg_free(msg); +out: + rtnl_set_sk_err(net, RTNLGRP_NSID, err); +} + static int __init net_ns_init(void) { struct net_generic *ng; @@ -637,7 +719,8 @@ static int __init net_ns_init(void) register_pernet_subsys(&net_ns_ops); rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, + NULL); return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 5e02260b087f..358d52a38533 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -818,7 +818,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, nla_total_size(sizeof(struct ifla_vf_vlan)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + - nla_total_size(sizeof(struct ifla_vf_link_state))); + nla_total_size(sizeof(struct ifla_vf_link_state)) + + nla_total_size(sizeof(struct ifla_vf_rss_query_en))); return size; } else return 0; @@ -1132,14 +1133,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_link_state vf_linkstate; + struct ifla_vf_rss_query_en vf_rss_query_en; /* * Not all SR-IOV capable drivers support the - * spoofcheck query. Preset to -1 so the user - * space tool can detect that the driver didn't - * report anything. + * spoofcheck and "RSS query enable" query. Preset to + * -1 so the user space tool can detect that the driver + * didn't report anything. */ ivi.spoofchk = -1; + ivi.rss_query_en = -1; memset(ivi.mac, 0, sizeof(ivi.mac)); /* The default value for VF link state is "auto" * IFLA_VF_LINK_STATE_AUTO which equals zero @@ -1152,7 +1155,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = - vf_linkstate.vf = ivi.vf; + vf_linkstate.vf = + vf_rss_query_en.vf = ivi.vf; memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; @@ -1162,6 +1166,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, vf_rate.max_tx_rate = ivi.max_tx_rate; vf_spoofchk.setting = ivi.spoofchk; vf_linkstate.link_state = ivi.linkstate; + vf_rss_query_en.setting = ivi.rss_query_en; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -1176,7 +1181,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), &vf_spoofchk) || nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate), - &vf_linkstate)) + &vf_linkstate) || + nla_put(skb, IFLA_VF_RSS_QUERY_EN, + sizeof(vf_rss_query_en), + &vf_rss_query_en)) goto nla_put_failure; nla_nest_end(skb, vf); } @@ -1290,6 +1298,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, [IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) }, + [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) }, }; static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { @@ -1500,6 +1509,17 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivl->link_state); break; } + case IFLA_VF_RSS_QUERY_EN: { + struct ifla_vf_rss_query_en *ivrssq_en; + + ivrssq_en = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_rss_query_en) + err = ops->ndo_set_vf_rss_query_en(dev, + ivrssq_en->vf, + ivrssq_en->setting); + break; + } default: err = -EINVAL; break; @@ -1932,7 +1952,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) EXPORT_SYMBOL(rtnl_configure_link); struct net_device *rtnl_create_link(struct net *net, - char *ifname, unsigned char name_assign_type, + const char *ifname, unsigned char name_assign_type, const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; @@ -2404,7 +2424,7 @@ EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, - u8 *addr, u32 pid, u32 seq, + u8 *addr, u16 vid, u32 pid, u32 seq, int type, unsigned int flags, int nlflags) { @@ -2426,6 +2446,9 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb, if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr)) goto nla_put_failure; + if (vid) + if (nla_put(skb, NDA_VLAN, sizeof(u16), &vid)) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; @@ -2440,7 +2463,7 @@ static inline size_t rtnl_fdb_nlmsg_size(void) return NLMSG_ALIGN(sizeof(struct ndmsg)) + nla_total_size(ETH_ALEN); } -static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) +static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, u16 vid, int type) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2450,7 +2473,8 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type) if (!skb) goto errout; - err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0); + err = nlmsg_populate_fdb_fill(skb, dev, addr, vid, + 0, 0, type, NTF_SELF, 0); if (err < 0) { kfree_skb(skb); goto errout; @@ -2585,7 +2609,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh) nlh->nlmsg_flags); if (!err) { - rtnl_fdb_notify(dev, addr, RTM_NEWNEIGH); + rtnl_fdb_notify(dev, addr, vid, RTM_NEWNEIGH); ndm->ndm_flags &= ~NTF_SELF; } } @@ -2686,7 +2710,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh) err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid); if (!err) { - rtnl_fdb_notify(dev, addr, RTM_DELNEIGH); + rtnl_fdb_notify(dev, addr, vid, RTM_DELNEIGH); ndm->ndm_flags &= ~NTF_SELF; } } @@ -2711,7 +2735,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, if (*idx < cb->args[0]) goto skip; - err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, + err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, portid, seq, RTM_NEWNEIGH, NTF_SELF, NLM_F_MULTI); @@ -2754,7 +2778,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct net_device *dev; struct nlattr *tb[IFLA_MAX+1]; - struct net_device *bdev = NULL; struct net_device *br_dev = NULL; const struct net_device_ops *ops = NULL; const struct net_device_ops *cops = NULL; @@ -2778,7 +2801,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) return -ENODEV; ops = br_dev->netdev_ops; - bdev = br_dev; } for_each_netdev(net, dev) { @@ -2791,7 +2813,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) cops = br_dev->netdev_ops; } - bdev = dev; } else { if (dev != br_dev && !(dev->priv_flags & IFF_BRIDGE_PORT)) @@ -2801,7 +2822,6 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) !(dev->priv_flags & IFF_EBRIDGE)) continue; - bdev = br_dev; cops = ops; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index cdb939b731aa..3b6e5830256e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3752,7 +3752,6 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); - /** * skb_partial_csum_set - set up and verify partial csum values for packet * @skb: the skb to set diff --git a/net/core/sock.c b/net/core/sock.c index 654e38a99759..e891bcf325ca 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2799,8 +2799,7 @@ int proto_register(struct proto *prot, int alloc_slab) kmem_cache_create(prot->twsk_prot->twsk_slab_name, prot->twsk_prot->twsk_obj_size, 0, - SLAB_HWCACHE_ALIGN | - prot->slab_flags, + prot->slab_flags, NULL); if (prot->twsk_prot->twsk_slab == NULL) goto out_free_timewait_sock_slab_name; diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 332f7d6d9942..5f566663e47f 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -27,28 +27,16 @@ struct inet_timewait_death_row dccp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, - .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock), .hashinfo = &dccp_hashinfo, - .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, - (unsigned long)&dccp_death_row), - .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work, - inet_twdr_twkill_work), -/* Short-time timewait calendar */ - - .twcal_hand = -1, - .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, - (unsigned long)&dccp_death_row), }; EXPORT_SYMBOL_GPL(dccp_death_row); void dccp_time_wait(struct sock *sk, int state, int timeo) { - struct inet_timewait_sock *tw = NULL; + struct inet_timewait_sock *tw; - if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets) - tw = inet_twsk_alloc(sk, state); + tw = inet_twsk_alloc(sk, &dccp_death_row, state); if (tw != NULL) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -71,8 +59,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo) if (state == DCCP_TIME_WAIT) timeo = DCCP_TIMEWAIT_LEN; - inet_twsk_schedule(tw, &dccp_death_row, timeo, - DCCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, timeo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index be1f08cdad29..4507b188fc51 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb) return err; } -static int dn_neigh_output_packet(struct sk_buff *skb) +static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *)dst; @@ -206,7 +206,8 @@ static int dn_neigh_output_packet(struct sk_buff *skb) /* * For talking to broadcast devices: Ethernet & PPP */ -static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) +static int dn_long_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_long_packet) + 3; @@ -245,14 +246,15 @@ static int dn_long_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); } /* * For talking to pointopoint and multidrop devices: DDCMP and X.25 */ -static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) +static int dn_short_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; @@ -284,8 +286,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); } /* @@ -293,7 +295,8 @@ static int dn_short_output(struct neighbour *neigh, struct sk_buff *skb) * Phase 3 output is the same as short output, execpt that * it clears the area bits before transmission. */ -static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) +static int dn_phase3_output(struct neighbour *neigh, struct sock *sk, + struct sk_buff *skb) { struct net_device *dev = neigh->dev; int headroom = dev->hard_header_len + sizeof(struct dn_short_packet) + 2; @@ -324,11 +327,11 @@ static int dn_phase3_output(struct neighbour *neigh, struct sk_buff *skb) skb_reset_network_header(skb); - return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, skb, NULL, - neigh->dev, dn_neigh_output_packet); + return NF_HOOK(NFPROTO_DECNET, NF_DN_POST_ROUTING, sk, skb, + NULL, neigh->dev, dn_neigh_output_packet); } -int dn_to_neigh_output(struct sk_buff *skb) +int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct dn_route *rt = (struct dn_route *) dst; @@ -347,11 +350,11 @@ int dn_to_neigh_output(struct sk_buff *skb) rcu_read_unlock(); if (dn->flags & DN_NDFLAG_P3) - return dn_phase3_output(neigh, skb); + return dn_phase3_output(neigh, sk, skb); if (use_long) - return dn_long_output(neigh, skb); + return dn_long_output(neigh, sk, skb); else - return dn_short_output(neigh, skb); + return dn_short_output(neigh, sk, skb); } /* @@ -372,7 +375,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb) /* * Ethernet router hello message received */ -int dn_neigh_router_hello(struct sk_buff *skb) +int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb) { struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data; @@ -434,7 +437,7 @@ int dn_neigh_router_hello(struct sk_buff *skb) /* * Endnode hello message received */ -int dn_neigh_endnode_hello(struct sk_buff *skb) +int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb) { struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data; struct neighbour *neigh; diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index fe5f01485d33..a321eac9fd0c 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -714,7 +714,7 @@ out: return ret; } -static int dn_nsp_rx_packet(struct sk_buff *skb) +static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb) { struct dn_skb_cb *cb = DN_SKB_CB(skb); struct sock *sk = NULL; @@ -814,7 +814,8 @@ free_out: int dn_nsp_rx(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_IN, NULL, skb, + skb->dev, NULL, dn_nsp_rx_packet); } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 9ab0c4ba297f..03227ffd19ce 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb) * * Returns: result of input function if route is found, error code otherwise */ -static int dn_route_rx_packet(struct sk_buff *skb) +static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb) { struct dn_skb_cb *cb; int err; @@ -573,7 +573,8 @@ static int dn_route_rx_long(struct sk_buff *skb) ptr++; cb->hops = *ptr++; /* Visit Count */ - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, + skb->dev, NULL, dn_route_rx_packet); drop_it: @@ -600,7 +601,8 @@ static int dn_route_rx_short(struct sk_buff *skb) ptr += 2; cb->hops = *ptr & 0x3f; - return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_DECNET, NF_DN_PRE_ROUTING, NULL, skb, + skb->dev, NULL, dn_route_rx_packet); drop_it: @@ -608,7 +610,7 @@ drop_it: return NET_RX_DROP; } -static int dn_route_discard(struct sk_buff *skb) +static int dn_route_discard(struct sock *sk, struct sk_buff *skb) { /* * I know we drop the packet here, but thats considered success in @@ -618,7 +620,7 @@ static int dn_route_discard(struct sk_buff *skb) return NET_RX_SUCCESS; } -static int dn_route_ptp_hello(struct sk_buff *skb) +static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb) { dn_dev_hello(skb); dn_neigh_pointopoint_hello(skb); @@ -704,22 +706,22 @@ int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type switch (flags & DN_RT_CNTL_MSK) { case DN_RT_PKT_HELO: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_route_ptp_hello); case DN_RT_PKT_L1RT: case DN_RT_PKT_L2RT: return NF_HOOK(NFPROTO_DECNET, NF_DN_ROUTE, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_route_discard); case DN_RT_PKT_ERTH: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_neigh_router_hello); case DN_RT_PKT_EEDH: return NF_HOOK(NFPROTO_DECNET, NF_DN_HELLO, - skb, skb->dev, NULL, + NULL, skb, skb->dev, NULL, dn_neigh_endnode_hello); } } else { @@ -768,7 +770,8 @@ static int dn_output(struct sock *sk, struct sk_buff *skb) cb->rt_flags |= DN_RT_F_IE; cb->hops = 0; - return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, skb, NULL, dev, + return NF_HOOK(NFPROTO_DECNET, NF_DN_LOCAL_OUT, sk, skb, + NULL, dev, dn_to_neigh_output); error: @@ -816,7 +819,8 @@ static int dn_forward(struct sk_buff *skb) if (rt->rt_flags & RTCF_DOREDIRECT) cb->rt_flags |= DN_RT_F_IE; - return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, skb, dev, skb->dev, + return NF_HOOK(NFPROTO_DECNET, NF_DN_FORWARD, NULL, skb, + dev, skb->dev, dn_to_neigh_output); drop: diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c6e67aa46c32..933a92820d26 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -591,7 +591,8 @@ EXPORT_SYMBOL(arp_create); void arp_xmit(struct sk_buff *skb) { /* Send it off, maybe filter it using firewalling first. */ - NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); + NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, NULL, skb, + NULL, skb->dev, dev_queue_xmit_sk); } EXPORT_SYMBOL(arp_xmit); @@ -625,7 +626,7 @@ EXPORT_SYMBOL(arp_send); * Process an arp request. */ -static int arp_process(struct sk_buff *skb) +static int arp_process(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb->dev; struct in_device *in_dev = __in_dev_get_rcu(dev); @@ -846,7 +847,7 @@ out: static void parp_redo(struct sk_buff *skb) { - arp_process(skb); + arp_process(NULL, skb); } @@ -879,7 +880,8 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); + return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, NULL, skb, + dev, NULL, arp_process); consumeskb: consume_skb(skb); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index ff069f6597ac..af150b43b214 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -16,14 +16,12 @@ #include <uapi/linux/fou.h> #include <uapi/linux/genetlink.h> -static DEFINE_SPINLOCK(fou_lock); -static LIST_HEAD(fou_list); - struct fou { struct socket *sock; u8 protocol; u8 flags; - u16 port; + __be16 port; + u16 type; struct udp_offload udp_offloads; struct list_head list; }; @@ -37,6 +35,13 @@ struct fou_cfg { struct udp_port_cfg udp_config; }; +static unsigned int fou_net_id; + +struct fou_net { + struct list_head fou_list; + struct mutex fou_lock; +}; + static inline struct fou *fou_from_sock(struct sock *sk) { return sk->sk_user_data; @@ -387,20 +392,21 @@ out_unlock: return err; } -static int fou_add_to_port_list(struct fou *fou) +static int fou_add_to_port_list(struct net *net, struct fou *fou) { + struct fou_net *fn = net_generic(net, fou_net_id); struct fou *fout; - spin_lock(&fou_lock); - list_for_each_entry(fout, &fou_list, list) { + mutex_lock(&fn->fou_lock); + list_for_each_entry(fout, &fn->fou_list, list) { if (fou->port == fout->port) { - spin_unlock(&fou_lock); + mutex_unlock(&fn->fou_lock); return -EALREADY; } } - list_add(&fou->list, &fou_list); - spin_unlock(&fou_lock); + list_add(&fou->list, &fn->fou_list); + mutex_unlock(&fn->fou_lock); return 0; } @@ -410,14 +416,10 @@ static void fou_release(struct fou *fou) struct socket *sock = fou->sock; struct sock *sk = sock->sk; - udp_del_offload(&fou->udp_offloads); - + if (sk->sk_family == AF_INET) + udp_del_offload(&fou->udp_offloads); list_del(&fou->list); - - /* Remove hooks into tunnel socket */ - sk->sk_user_data = NULL; - - sock_release(sock); + udp_tunnel_sock_release(sock); kfree(fou); } @@ -447,10 +449,10 @@ static int gue_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg) static int fou_create(struct net *net, struct fou_cfg *cfg, struct socket **sockp) { - struct fou *fou = NULL; - int err; struct socket *sock = NULL; + struct fou *fou = NULL; struct sock *sk; + int err; /* Open UDP socket */ err = udp_sock_create(net, &cfg->udp_config, &sock); @@ -486,6 +488,8 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, goto error; } + fou->type = cfg->type; + udp_sk(sk)->encap_type = 1; udp_encap_enable(); @@ -502,7 +506,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, goto error; } - err = fou_add_to_port_list(fou); + err = fou_add_to_port_list(net, fou); if (err) goto error; @@ -514,27 +518,27 @@ static int fou_create(struct net *net, struct fou_cfg *cfg, error: kfree(fou); if (sock) - sock_release(sock); + udp_tunnel_sock_release(sock); return err; } static int fou_destroy(struct net *net, struct fou_cfg *cfg) { - struct fou *fou; - u16 port = cfg->udp_config.local_udp_port; + struct fou_net *fn = net_generic(net, fou_net_id); + __be16 port = cfg->udp_config.local_udp_port; int err = -EINVAL; + struct fou *fou; - spin_lock(&fou_lock); - list_for_each_entry(fou, &fou_list, list) { + mutex_lock(&fn->fou_lock); + list_for_each_entry(fou, &fn->fou_list, list) { if (fou->port == port) { - udp_del_offload(&fou->udp_offloads); fou_release(fou); err = 0; break; } } - spin_unlock(&fou_lock); + mutex_unlock(&fn->fou_lock); return err; } @@ -573,7 +577,7 @@ static int parse_nl_config(struct genl_info *info, } if (info->attrs[FOU_ATTR_PORT]) { - u16 port = nla_get_u16(info->attrs[FOU_ATTR_PORT]); + __be16 port = nla_get_be16(info->attrs[FOU_ATTR_PORT]); cfg->udp_config.local_udp_port = port; } @@ -592,6 +596,7 @@ static int parse_nl_config(struct genl_info *info, static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); struct fou_cfg cfg; int err; @@ -599,16 +604,120 @@ static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info) if (err) return err; - return fou_create(&init_net, &cfg, NULL); + return fou_create(net, &cfg, NULL); } static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info) { + struct net *net = genl_info_net(info); + struct fou_cfg cfg; + int err; + + err = parse_nl_config(info, &cfg); + if (err) + return err; + + return fou_destroy(net, &cfg); +} + +static int fou_fill_info(struct fou *fou, struct sk_buff *msg) +{ + if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) || + nla_put_be16(msg, FOU_ATTR_PORT, fou->port) || + nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) || + nla_put_u8(msg, FOU_ATTR_TYPE, fou->type)) + return -1; + + if (fou->flags & FOU_F_REMCSUM_NOPARTIAL) + if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL)) + return -1; + return 0; +} + +static int fou_dump_info(struct fou *fou, u32 portid, u32 seq, + u32 flags, struct sk_buff *skb, u8 cmd) +{ + void *hdr; + + hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd); + if (!hdr) + return -ENOMEM; + + if (fou_fill_info(fou, skb) < 0) + goto nla_put_failure; + + genlmsg_end(skb, hdr); + return 0; + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = genl_info_net(info); + struct fou_net *fn = net_generic(net, fou_net_id); + struct sk_buff *msg; struct fou_cfg cfg; + struct fou *fout; + __be16 port; + int ret; + + ret = parse_nl_config(info, &cfg); + if (ret) + return ret; + port = cfg.udp_config.local_udp_port; + if (port == 0) + return -EINVAL; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + ret = -ESRCH; + mutex_lock(&fn->fou_lock); + list_for_each_entry(fout, &fn->fou_list, list) { + if (port == fout->port) { + ret = fou_dump_info(fout, info->snd_portid, + info->snd_seq, 0, msg, + info->genlhdr->cmd); + break; + } + } + mutex_unlock(&fn->fou_lock); + if (ret < 0) + goto out_free; - parse_nl_config(info, &cfg); + return genlmsg_reply(msg, info); - return fou_destroy(&init_net, &cfg); +out_free: + nlmsg_free(msg); + return ret; +} + +static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct fou_net *fn = net_generic(net, fou_net_id); + struct fou *fout; + int idx = 0, ret; + + mutex_lock(&fn->fou_lock); + list_for_each_entry(fout, &fn->fou_list, list) { + if (idx++ < cb->args[0]) + continue; + ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI, + skb, FOU_CMD_GET); + if (ret) + goto done; + } + mutex_unlock(&fn->fou_lock); + +done: + cb->args[0] = idx; + return skb->len; } static const struct genl_ops fou_nl_ops[] = { @@ -624,6 +733,12 @@ static const struct genl_ops fou_nl_ops[] = { .policy = fou_nl_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = FOU_CMD_GET, + .doit = fou_nl_cmd_get_port, + .dumpit = fou_nl_dump, + .policy = fou_nl_policy, + }, }; size_t fou_encap_hlen(struct ip_tunnel_encap *e) @@ -771,12 +886,12 @@ EXPORT_SYMBOL(gue_build_header); #ifdef CONFIG_NET_FOU_IP_TUNNELS -static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = { +static const struct ip_tunnel_encap_ops fou_iptun_ops = { .encap_hlen = fou_encap_hlen, .build_header = fou_build_header, }; -static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = { +static const struct ip_tunnel_encap_ops gue_iptun_ops = { .encap_hlen = gue_encap_hlen, .build_header = gue_build_header, }; @@ -820,38 +935,63 @@ static void ip_tunnel_encap_del_fou_ops(void) #endif +static __net_init int fou_init_net(struct net *net) +{ + struct fou_net *fn = net_generic(net, fou_net_id); + + INIT_LIST_HEAD(&fn->fou_list); + mutex_init(&fn->fou_lock); + return 0; +} + +static __net_exit void fou_exit_net(struct net *net) +{ + struct fou_net *fn = net_generic(net, fou_net_id); + struct fou *fou, *next; + + /* Close all the FOU sockets */ + mutex_lock(&fn->fou_lock); + list_for_each_entry_safe(fou, next, &fn->fou_list, list) + fou_release(fou); + mutex_unlock(&fn->fou_lock); +} + +static struct pernet_operations fou_net_ops = { + .init = fou_init_net, + .exit = fou_exit_net, + .id = &fou_net_id, + .size = sizeof(struct fou_net), +}; + static int __init fou_init(void) { int ret; + ret = register_pernet_device(&fou_net_ops); + if (ret) + goto exit; + ret = genl_register_family_with_ops(&fou_nl_family, fou_nl_ops); - if (ret < 0) - goto exit; + goto unregister; ret = ip_tunnel_encap_add_fou_ops(); - if (ret < 0) - genl_unregister_family(&fou_nl_family); + if (ret == 0) + return 0; + genl_unregister_family(&fou_nl_family); +unregister: + unregister_pernet_device(&fou_net_ops); exit: return ret; } static void __exit fou_fini(void) { - struct fou *fou, *next; - ip_tunnel_encap_del_fou_ops(); - genl_unregister_family(&fou_nl_family); - - /* Close all the FOU sockets */ - - spin_lock(&fou_lock); - list_for_each_entry_safe(fou, next, &fou_list, list) - fou_release(fou); - spin_unlock(&fou_lock); + unregister_pernet_device(&fou_net_ops); } module_init(fou_init); diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c index e64f8e9785d1..8986e63f3bda 100644 --- a/net/ipv4/geneve.c +++ b/net/ipv4/geneve.c @@ -113,10 +113,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, int min_headroom; int err; - skb = udp_tunnel_handle_offloads(skb, csum); - if (IS_ERR(skb)) - return PTR_ERR(skb); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr) + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); @@ -131,12 +127,16 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt, if (unlikely(!skb)) return -ENOMEM; + skb = udp_tunnel_handle_offloads(skb, csum); + if (IS_ERR(skb)) + return PTR_ERR(skb); + gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len); geneve_build_header(gnvh, tun_flags, vni, opt_len, opt); skb_set_inner_protocol(skb, htons(ETH_P_TEB)); - return udp_tunnel_xmit_skb(rt, skb, src, dst, + return udp_tunnel_xmit_skb(rt, gs->sock->sk, skb, src, dst, tos, ttl, df, src_port, dst_port, xnet, !csum); } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 76322c9867d5..70e8b3c308ec 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -248,7 +248,7 @@ static int inet_twsk_diag_fill(struct sock *sk, struct inet_timewait_sock *tw = inet_twsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; - s32 tmo; + long tmo; nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); @@ -258,7 +258,7 @@ static int inet_twsk_diag_fill(struct sock *sk, r = nlmsg_data(nlh); BUG_ON(tw->tw_state != TCP_TIME_WAIT); - tmo = tw->tw_ttd - inet_tw_time_stamp(); + tmo = tw->tw_timer.expires - jiffies; if (tmo < 0) tmo = 0; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index d4630bf2d9aa..c6fb80bd5826 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -388,7 +388,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); } @@ -565,7 +565,7 @@ ok: spin_unlock(&head->lock); if (tw) { - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); while (twrefcnt) { twrefcnt--; inet_twsk_put(tw); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 118f0f195820..00ec8d5d7e7e 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -67,9 +67,9 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, } /* Must be called with locally disabled BHs. */ -static void __inet_twsk_kill(struct inet_timewait_sock *tw, - struct inet_hashinfo *hashinfo) +static void inet_twsk_kill(struct inet_timewait_sock *tw) { + struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo; struct inet_bind_hashbucket *bhead; int refcnt; /* Unlink from established hashes. */ @@ -89,6 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt)); atomic_sub(refcnt, &tw->tw_refcnt); + atomic_dec(&tw->tw_dr->tw_count); + inet_twsk_put(tw); } void inet_twsk_free(struct inet_timewait_sock *tw) @@ -168,16 +170,34 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); -struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state) +void tw_timer_handler(unsigned long data) { - struct inet_timewait_sock *tw = - kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, - GFP_ATOMIC); + struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; + + if (tw->tw_kill) + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); + else + NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); + inet_twsk_kill(tw); +} + +struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, + struct inet_timewait_death_row *dr, + const int state) +{ + struct inet_timewait_sock *tw; + + if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets) + return NULL; + + tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab, + GFP_ATOMIC); if (tw) { const struct inet_sock *inet = inet_sk(sk); kmemcheck_annotate_bitfield(tw, flags); + tw->tw_dr = dr; /* Give us an identity. */ tw->tw_daddr = inet->inet_daddr; tw->tw_rcv_saddr = inet->inet_rcv_saddr; @@ -196,13 +216,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, sock_net(sk)); + setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this * timewait socket. */ atomic_set(&tw->tw_refcnt, 0); - inet_twsk_dead_node_init(tw); + __module_get(tw->tw_prot->owner); } @@ -210,139 +231,20 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat } EXPORT_SYMBOL_GPL(inet_twsk_alloc); -/* Returns non-zero if quota exceeded. */ -static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr, - const int slot) -{ - struct inet_timewait_sock *tw; - unsigned int killed; - int ret; - - /* NOTE: compare this to previous version where lock - * was released after detaching chain. It was racy, - * because tw buckets are scheduled in not serialized context - * in 2.3 (with netfilter), and with softnet it is common, because - * soft irqs are not sequenced. - */ - killed = 0; - ret = 0; -rescan: - inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) { - __inet_twsk_del_dead_node(tw); - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); -#ifdef CONFIG_NET_NS - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED); -#endif - inet_twsk_put(tw); - killed++; - spin_lock(&twdr->death_lock); - if (killed > INET_TWDR_TWKILL_QUOTA) { - ret = 1; - break; - } - - /* While we dropped twdr->death_lock, another cpu may have - * killed off the next TW bucket in the list, therefore - * do a fresh re-read of the hlist head node with the - * lock reacquired. We still use the hlist traversal - * macro in order to get the prefetches. - */ - goto rescan; - } - - twdr->tw_count -= killed; -#ifndef CONFIG_NET_NS - NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed); -#endif - return ret; -} - -void inet_twdr_hangman(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - unsigned int need_timer; - - twdr = (struct inet_timewait_death_row *)data; - spin_lock(&twdr->death_lock); - - if (twdr->tw_count == 0) - goto out; - - need_timer = 0; - if (inet_twdr_do_twkill_work(twdr, twdr->slot)) { - twdr->thread_slots |= (1 << twdr->slot); - schedule_work(&twdr->twkill_work); - need_timer = 1; - } else { - /* We purged the entire slot, anything left? */ - if (twdr->tw_count) - need_timer = 1; - twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1)); - } - if (need_timer) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); -out: - spin_unlock(&twdr->death_lock); -} -EXPORT_SYMBOL_GPL(inet_twdr_hangman); - -void inet_twdr_twkill_work(struct work_struct *work) -{ - struct inet_timewait_death_row *twdr = - container_of(work, struct inet_timewait_death_row, twkill_work); - int i; - - BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) > - (sizeof(twdr->thread_slots) * 8)); - - while (twdr->thread_slots) { - spin_lock_bh(&twdr->death_lock); - for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) { - if (!(twdr->thread_slots & (1 << i))) - continue; - - while (inet_twdr_do_twkill_work(twdr, i) != 0) { - if (need_resched()) { - spin_unlock_bh(&twdr->death_lock); - schedule(); - spin_lock_bh(&twdr->death_lock); - } - } - - twdr->thread_slots &= ~(1 << i); - } - spin_unlock_bh(&twdr->death_lock); - } -} -EXPORT_SYMBOL_GPL(inet_twdr_twkill_work); - /* These are always called from BH context. See callers in * tcp_input.c to verify this. */ /* This is for handling early-kills of TIME_WAIT sockets. */ -void inet_twsk_deschedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr) +void inet_twsk_deschedule(struct inet_timewait_sock *tw) { - spin_lock(&twdr->death_lock); - if (inet_twsk_del_dead_node(tw)) { - inet_twsk_put(tw); - if (--twdr->tw_count == 0) - del_timer(&twdr->tw_timer); - } - spin_unlock(&twdr->death_lock); - __inet_twsk_kill(tw, twdr->hashinfo); + if (del_timer_sync(&tw->tw_timer)) + inet_twsk_kill(tw); } EXPORT_SYMBOL(inet_twsk_deschedule); -void inet_twsk_schedule(struct inet_timewait_sock *tw, - struct inet_timewait_death_row *twdr, - const int timeo, const int timewait_len) +void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo) { - struct hlist_head *list; - int slot; - /* timeout := RTO * 3.5 * * 3.5 = 1+2+0.5 to wait for two retransmits. @@ -367,115 +269,15 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw, * is greater than TS tick!) and detect old duplicates with help * of PAWS. */ - slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK; - spin_lock(&twdr->death_lock); - - /* Unlink it, if it was scheduled */ - if (inet_twsk_del_dead_node(tw)) - twdr->tw_count--; - else + tw->tw_kill = timeo <= 4*HZ; + if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) { atomic_inc(&tw->tw_refcnt); - - if (slot >= INET_TWDR_RECYCLE_SLOTS) { - /* Schedule to slow timer */ - if (timeo >= timewait_len) { - slot = INET_TWDR_TWKILL_SLOTS - 1; - } else { - slot = DIV_ROUND_UP(timeo, twdr->period); - if (slot >= INET_TWDR_TWKILL_SLOTS) - slot = INET_TWDR_TWKILL_SLOTS - 1; - } - tw->tw_ttd = inet_tw_time_stamp() + timeo; - slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1); - list = &twdr->cells[slot]; - } else { - tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK); - - if (twdr->twcal_hand < 0) { - twdr->twcal_hand = 0; - twdr->twcal_jiffie = jiffies; - twdr->twcal_timer.expires = twdr->twcal_jiffie + - (slot << INET_TWDR_RECYCLE_TICK); - add_timer(&twdr->twcal_timer); - } else { - if (time_after(twdr->twcal_timer.expires, - jiffies + (slot << INET_TWDR_RECYCLE_TICK))) - mod_timer(&twdr->twcal_timer, - jiffies + (slot << INET_TWDR_RECYCLE_TICK)); - slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - list = &twdr->twcal_row[slot]; + atomic_inc(&tw->tw_dr->tw_count); } - - hlist_add_head(&tw->tw_death_node, list); - - if (twdr->tw_count++ == 0) - mod_timer(&twdr->tw_timer, jiffies + twdr->period); - spin_unlock(&twdr->death_lock); } EXPORT_SYMBOL_GPL(inet_twsk_schedule); -void inet_twdr_twcal_tick(unsigned long data) -{ - struct inet_timewait_death_row *twdr; - int n, slot; - unsigned long j; - unsigned long now = jiffies; - int killed = 0; - int adv = 0; - - twdr = (struct inet_timewait_death_row *)data; - - spin_lock(&twdr->death_lock); - if (twdr->twcal_hand < 0) - goto out; - - slot = twdr->twcal_hand; - j = twdr->twcal_jiffie; - - for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) { - if (time_before_eq(j, now)) { - struct hlist_node *safe; - struct inet_timewait_sock *tw; - - inet_twsk_for_each_inmate_safe(tw, safe, - &twdr->twcal_row[slot]) { - __inet_twsk_del_dead_node(tw); - __inet_twsk_kill(tw, twdr->hashinfo); -#ifdef CONFIG_NET_NS - NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); -#endif - inet_twsk_put(tw); - killed++; - } - } else { - if (!adv) { - adv = 1; - twdr->twcal_jiffie = j; - twdr->twcal_hand = slot; - } - - if (!hlist_empty(&twdr->twcal_row[slot])) { - mod_timer(&twdr->twcal_timer, j); - goto out; - } - } - j += 1 << INET_TWDR_RECYCLE_TICK; - slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1); - } - twdr->twcal_hand = -1; - -out: - if ((twdr->tw_count -= killed) == 0) - del_timer(&twdr->tw_timer); -#ifndef CONFIG_NET_NS - NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed); -#endif - spin_unlock(&twdr->death_lock); -} -EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick); - void inet_twsk_purge(struct inet_hashinfo *hashinfo, struct inet_timewait_death_row *twdr, int family) { @@ -509,7 +311,7 @@ restart: rcu_read_unlock(); local_bh_disable(); - inet_twsk_deschedule(tw, twdr); + inet_twsk_deschedule(tw); local_bh_enable(); inet_twsk_put(tw); goto restart_rcu; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index d9bc28ac5d1b..939992c456f3 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -57,7 +57,7 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu) } -static int ip_forward_finish(struct sk_buff *skb) +static int ip_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); @@ -68,7 +68,7 @@ static int ip_forward_finish(struct sk_buff *skb) ip_forward_options(skb); skb_sender_cpu_clear(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } int ip_forward(struct sk_buff *skb) @@ -136,8 +136,8 @@ int ip_forward(struct sk_buff *skb) skb->priority = rt_tos2priority(iph->tos); - return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, - rt->dst.dev, ip_forward_finish); + return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, + skb->dev, rt->dst.dev, ip_forward_finish); sr_failed: /* diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 2e0410ed8f16..2db4c8773c1b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -187,7 +187,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) return false; } -static int ip_local_deliver_finish(struct sk_buff *skb) +static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb->dev); @@ -253,7 +253,8 @@ int ip_local_deliver(struct sk_buff *skb) return 0; } - return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, NULL, skb, + skb->dev, NULL, ip_local_deliver_finish); } @@ -309,7 +310,7 @@ drop: int sysctl_ip_early_demux __read_mostly = 1; EXPORT_SYMBOL(sysctl_ip_early_demux); -static int ip_rcv_finish(struct sk_buff *skb) +static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; @@ -451,7 +452,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, + dev, NULL, ip_rcv_finish); csum_error: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 26f6f7956168..c65b93a7b711 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -91,14 +91,19 @@ void ip_send_check(struct iphdr *iph) } EXPORT_SYMBOL(ip_send_check); -int __ip_local_out(struct sk_buff *skb) +int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb) { struct iphdr *iph = ip_hdr(skb); iph->tot_len = htons(skb->len); ip_send_check(iph); - return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - skb_dst(skb)->dev, dst_output); + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, NULL, + skb_dst(skb)->dev, dst_output_sk); +} + +int __ip_local_out(struct sk_buff *skb) +{ + return __ip_local_out_sk(skb->sk, skb); } int ip_local_out_sk(struct sock *sk, struct sk_buff *skb) @@ -163,7 +168,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, } EXPORT_SYMBOL_GPL(ip_build_and_send_pkt); -static inline int ip_finish_output2(struct sk_buff *skb) +static inline int ip_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = (struct rtable *)dst; @@ -211,7 +216,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) return -EINVAL; } -static int ip_finish_output_gso(struct sk_buff *skb) +static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb) { netdev_features_t features; struct sk_buff *segs; @@ -220,7 +225,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) /* common case: locally created skb or seglen is <= mtu */ if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || skb_gso_network_seglen(skb) <= ip_skb_dst_mtu(skb)) - return ip_finish_output2(skb); + return ip_finish_output2(sk, skb); /* Slowpath - GSO segment length is exceeding the dst MTU. * @@ -243,7 +248,7 @@ static int ip_finish_output_gso(struct sk_buff *skb) int err; segs->next = NULL; - err = ip_fragment(segs, ip_finish_output2); + err = ip_fragment(sk, segs, ip_finish_output2); if (err && ret == 0) ret = err; @@ -253,22 +258,22 @@ static int ip_finish_output_gso(struct sk_buff *skb) return ret; } -static int ip_finish_output(struct sk_buff *skb) +static int ip_finish_output(struct sock *sk, struct sk_buff *skb) { #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM) /* Policy lookup after SNAT yielded a new policy */ if (skb_dst(skb)->xfrm) { IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif if (skb_is_gso(skb)) - return ip_finish_output_gso(skb); + return ip_finish_output_gso(sk, skb); if (skb->len > ip_skb_dst_mtu(skb)) - return ip_fragment(skb, ip_finish_output2); + return ip_fragment(sk, skb, ip_finish_output2); - return ip_finish_output2(skb); + return ip_finish_output2(sk, skb); } int ip_mc_output(struct sock *sk, struct sk_buff *skb) @@ -307,7 +312,7 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, - newskb, NULL, newskb->dev, + sk, newskb, NULL, newskb->dev, dev_loopback_xmit); } @@ -322,11 +327,11 @@ int ip_mc_output(struct sock *sk, struct sk_buff *skb) if (rt->rt_flags&RTCF_BROADCAST) { struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); if (newskb) - NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, newskb, + NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, newskb, NULL, newskb->dev, dev_loopback_xmit); } - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, skb->dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } @@ -340,7 +345,8 @@ int ip_output(struct sock *sk, struct sk_buff *skb) skb->dev = dev; skb->protocol = htons(ETH_P_IP); - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, + NULL, dev, ip_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } @@ -449,7 +455,6 @@ no_route: } EXPORT_SYMBOL(ip_queue_xmit); - static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) { to->pkt_type = from->pkt_type; @@ -480,7 +485,8 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) * single device frame, and queue such a frame for sending. */ -int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct iphdr *iph; int ptr; @@ -593,7 +599,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip_send_check(iph); } - err = output(skb); + err = output(sk, skb); if (!err) IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGCREATES); @@ -730,7 +736,7 @@ slow_path: ip_send_check(iph); - err = output(skb2); + err = output(sk, skb2); if (err) goto fail; @@ -813,7 +819,6 @@ static inline int ip_ufo_append_data(struct sock *sk, skb->csum = 0; - __skb_queue_tail(queue, skb); } else if (skb_is_gso(skb)) { goto append; @@ -1211,7 +1216,6 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, skb_shinfo(skb)->gso_type = SKB_GSO_UDP; } - while (size > 0) { int i; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6d364ab8e14e..4c2c3ba4ba65 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -782,7 +782,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, return; } - err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, + err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 8c4dcc46acd2..ce63ab21b6cd 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -74,7 +74,8 @@ int iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, iph->daddr = dst; iph->saddr = src; iph->ttl = ttl; - __ip_select_ident(sock_net(sk), iph, skb_shinfo(skb)->gso_segs ?: 1); + __ip_select_ident(dev_net(rt->dst.dev), iph, + skb_shinfo(skb)->gso_segs ?: 1); err = ip_local_out_sk(sk, skb); if (unlikely(net_xmit_eval(err))) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 5f17d0e78071..3a2c0162c3ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1679,7 +1679,7 @@ static void ip_encap(struct net *net, struct sk_buff *skb, nf_reset(skb); } -static inline int ipmr_forward_finish(struct sk_buff *skb) +static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); @@ -1689,7 +1689,7 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } /* @@ -1788,7 +1788,8 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev, + NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, NULL, skb, + skb->dev, dev, ipmr_forward_finish); return; diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index c5b794da51a9..3262e41ff76f 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -13,6 +13,7 @@ #include <net/dst.h> #include <net/netfilter/ipv4/nf_reject.h> #include <linux/netfilter_ipv4.h> +#include <linux/netfilter_bridge.h> #include <net/netfilter/ipv4/nf_reject.h> const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb, @@ -146,7 +147,8 @@ void nf_send_reset(struct sk_buff *oldskb, int hook) */ if (oldskb->nf_bridge) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; + + nskb->dev = nf_bridge_get_physindev(oldskb); niph->tot_len = htons(nskb->len); ip_send_check(niph); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index 665de06561cd..40e414c4ca56 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -17,20 +17,17 @@ #include <net/netfilter/ipv4/nf_nat_masquerade.h> static void nft_masq_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_masq *priv = nft_expr_priv(expr); struct nf_nat_range range; - unsigned int verdict; memset(&range, 0, sizeof(range)); range.flags = priv->flags; - verdict = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, - &range, pkt->out); - - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_masquerade_ipv4(pkt->skb, pkt->ops->hooknum, + &range, pkt->out); } static struct nft_expr_type nft_masq_ipv4_type; diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c index 6ecfce63201a..d8d795df9c13 100644 --- a/net/ipv4/netfilter/nft_redir_ipv4.c +++ b/net/ipv4/netfilter/nft_redir_ipv4.c @@ -18,26 +18,25 @@ #include <net/netfilter/nft_redir.h> static void nft_redir_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_redir *priv = nft_expr_priv(expr); struct nf_nat_ipv4_multi_range_compat mr; - unsigned int verdict; memset(&mr, 0, sizeof(mr)); if (priv->sreg_proto_min) { mr.range[0].min.all = - *(__be16 *)&data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min]; mr.range[0].max.all = - *(__be16 *)&data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max]; mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } mr.range[0].flags |= priv->flags; - verdict = nf_nat_redirect_ipv4(pkt->skb, &mr, pkt->ops->hooknum); - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_redirect_ipv4(pkt->skb, &mr, + pkt->ops->hooknum); } static struct nft_expr_type nft_redir_ipv4_type; diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 16a5d4d73d75..b07e58b51158 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -20,7 +20,7 @@ #include <net/netfilter/nft_reject.h> static void nft_reject_ipv4_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -33,9 +33,11 @@ static void nft_reject_ipv4_eval(const struct nft_expr *expr, case NFT_REJECT_TCP_RST: nf_send_reset(pkt->skb, pkt->ops->hooknum); break; + default: + break; } - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; } static struct nft_expr_type nft_reject_ipv4_type; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d8953ef0770c..e1f3b911dd1e 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -63,7 +63,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, - tcp_death_row.tw_count, sockets, + atomic_read(&tcp_death_row.tw_count), sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f2fc92a1241a..561cd4b8fc6e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -411,8 +411,8 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, icmp_out_count(net, ((struct icmphdr *) skb_transport_header(skb))->type); - err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, skb, NULL, - rt->dst.dev, dst_output); + err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT, sk, skb, + NULL, rt->dst.dev, dst_output_sk); if (err > 0) err = net_xmit_errno(err); if (err) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 5da55e2b5cd2..e3d87aca6be8 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -303,6 +303,7 @@ fastopen: } else if (foc->len > 0) /* Client presents an invalid cookie */ NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + valid_foc.exp = foc->exp; *foc = valid_foc; return false; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index c1ce304ba8d2..a7ef679dd3ea 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3099,17 +3099,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (sacked & TCPCB_SACKED_RETRANS) tp->retrans_out -= acked_pcount; flag |= FLAG_RETRANS_DATA_ACKED; - } else { + } else if (!(sacked & TCPCB_SACKED_ACKED)) { last_ackt = skb->skb_mstamp; WARN_ON_ONCE(last_ackt.v64 == 0); if (!first_ackt.v64) first_ackt = last_ackt; - if (!(sacked & TCPCB_SACKED_ACKED)) { - reord = min(pkts_acked, reord); - if (!after(scb->end_seq, tp->high_seq)) - flag |= FLAG_ORIG_SACK_ACKED; - } + reord = min(pkts_acked, reord); + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; } if (sacked & TCPCB_SACKED_ACKED) @@ -3603,6 +3601,23 @@ old_ack: return 0; } +static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, + bool syn, struct tcp_fastopen_cookie *foc, + bool exp_opt) +{ + /* Valid only in SYN or SYN-ACK with an even length. */ + if (!foc || !syn || len < 0 || (len & 1)) + return; + + if (len >= TCP_FASTOPEN_COOKIE_MIN && + len <= TCP_FASTOPEN_COOKIE_MAX) + memcpy(foc->val, cookie, len); + else if (len != 0) + len = -1; + foc->len = len; + foc->exp = exp_opt; +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -3692,21 +3707,22 @@ void tcp_parse_options(const struct sk_buff *skb, */ break; #endif + case TCPOPT_FASTOPEN: + tcp_parse_fastopen_option( + opsize - TCPOLEN_FASTOPEN_BASE, + ptr, th->syn, foc, false); + break; + case TCPOPT_EXP: /* Fast Open option shares code 254 using a - * 16 bits magic number. It's valid only in - * SYN or SYN-ACK with an even size. + * 16 bits magic number. */ - if (opsize < TCPOLEN_EXP_FASTOPEN_BASE || - get_unaligned_be16(ptr) != TCPOPT_FASTOPEN_MAGIC || - !foc || !th->syn || (opsize & 1)) - break; - foc->len = opsize - TCPOLEN_EXP_FASTOPEN_BASE; - if (foc->len >= TCP_FASTOPEN_COOKIE_MIN && - foc->len <= TCP_FASTOPEN_COOKIE_MAX) - memcpy(foc->val, ptr + 2, foc->len); - else if (foc->len != 0) - foc->len = -1; + if (opsize >= TCPOLEN_EXP_FASTOPEN_BASE && + get_unaligned_be16(ptr) == + TCPOPT_FASTOPEN_MAGIC) + tcp_parse_fastopen_option(opsize - + TCPOLEN_EXP_FASTOPEN_BASE, + ptr + 2, th->syn, foc, true); break; } @@ -5360,8 +5376,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; - u16 mss = tp->rx_opt.mss_clamp; - bool syn_drop; + u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; + bool syn_drop = false; if (mss == tp->rx_opt.user_mss) { struct tcp_options_received opt; @@ -5373,16 +5389,25 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, mss = opt.mss_clamp; } - if (!tp->syn_fastopen) /* Ignore an unsolicited cookie */ + if (!tp->syn_fastopen) { + /* Ignore an unsolicited cookie */ cookie->len = -1; + } else if (tp->total_retrans) { + /* SYN timed out and the SYN-ACK neither has a cookie nor + * acknowledges data. Presumably the remote received only + * the retransmitted (regular) SYNs: either the original + * SYN-data or the corresponding SYN-ACK was dropped. + */ + syn_drop = (cookie->len < 0 && data); + } else if (cookie->len < 0 && !tp->syn_data) { + /* We requested a cookie but didn't get it. If we did not use + * the (old) exp opt format then try so next time (try_exp=1). + * Otherwise we go back to use the RFC7413 opt (try_exp=2). + */ + try_exp = tp->syn_fastopen_exp ? 2 : 1; + } - /* The SYN-ACK neither has cookie nor acknowledges the data. Presumably - * the remote receives only the retransmitted (regular) SYNs: either - * the original SYN-data or the corresponding SYN-ACK is lost. - */ - syn_drop = (cookie->len <= 0 && data && tp->total_retrans); - - tcp_fastopen_cache_set(sk, mss, cookie, syn_drop); + tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); if (data) { /* Retransmit unacked data in SYN */ tcp_for_write_queue_from(data, sk) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 560f9571f7c4..3571f2be4470 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -897,9 +897,9 @@ EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(struct sock *sk, const struct sock *addr_sk) { - union tcp_md5_addr *addr; + const union tcp_md5_addr *addr; - addr = (union tcp_md5_addr *)&sk->sk_daddr; + addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; return tcp_md5_do_lookup(sk, addr, AF_INET); } EXPORT_SYMBOL(tcp_v4_md5_lookup); @@ -1685,7 +1685,7 @@ do_time_wait: iph->daddr, th->dest, inet_iif(skb)); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; goto process; @@ -2242,9 +2242,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) static void get_timewait4_sock(const struct inet_timewait_sock *tw, struct seq_file *f, int i) { + long delta = tw->tw_timer.expires - jiffies; __be32 dest, src; __u16 destp, srcp; - s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = tw->tw_daddr; src = tw->tw_rcv_saddr; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 78ecc4a01712..a51d63a43e33 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -28,7 +28,8 @@ static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *s struct tcp_fastopen_metrics { u16 mss; - u16 syn_loss:10; /* Recurring Fast Open SYN losses */ + u16 syn_loss:10, /* Recurring Fast Open SYN losses */ + try_exp:2; /* Request w/ exp. option (once) */ unsigned long last_syn_loss; /* Last Fast Open SYN loss */ struct tcp_fastopen_cookie cookie; }; @@ -131,6 +132,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, if (fastopen_clear) { tm->tcpm_fastopen.mss = 0; tm->tcpm_fastopen.syn_loss = 0; + tm->tcpm_fastopen.try_exp = 0; + tm->tcpm_fastopen.cookie.exp = false; tm->tcpm_fastopen.cookie.len = 0; } } @@ -713,6 +716,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, if (tfom->mss) *mss = tfom->mss; *cookie = tfom->cookie; + if (cookie->len <= 0 && tfom->try_exp == 1) + cookie->exp = true; *syn_loss = tfom->syn_loss; *last_syn_loss = *syn_loss ? tfom->last_syn_loss : 0; } while (read_seqretry(&fastopen_seqlock, seq)); @@ -721,7 +726,8 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, } void tcp_fastopen_cache_set(struct sock *sk, u16 mss, - struct tcp_fastopen_cookie *cookie, bool syn_lost) + struct tcp_fastopen_cookie *cookie, bool syn_lost, + u16 try_exp) { struct dst_entry *dst = __sk_dst_get(sk); struct tcp_metrics_block *tm; @@ -738,6 +744,9 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss, tfom->mss = mss; if (cookie && cookie->len > 0) tfom->cookie = *cookie; + else if (try_exp > tfom->try_exp && + tfom->cookie.len <= 0 && !tfom->cookie.exp) + tfom->try_exp = try_exp; if (syn_lost) { ++tfom->syn_loss; tfom->last_syn_loss = jiffies; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d7003911c894..63d6311b5365 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -34,18 +34,7 @@ int sysctl_tcp_abort_on_overflow __read_mostly; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, - .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS, - .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock), .hashinfo = &tcp_hashinfo, - .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0, - (unsigned long)&tcp_death_row), - .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work, - inet_twdr_twkill_work), -/* Short-time timewait calendar */ - - .twcal_hand = -1, - .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0, - (unsigned long)&tcp_death_row), }; EXPORT_SYMBOL_GPL(tcp_death_row); @@ -158,7 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, if (!th->fin || TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) { kill_with_rst: - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); return TCP_TW_RST; } @@ -174,11 +163,9 @@ kill_with_rst: if (tcp_death_row.sysctl_tw_recycle && tcptw->tw_ts_recent_stamp && tcp_tw_remember_stamp(tw)) - inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, tw->tw_timeout); else - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); return TCP_TW_ACK; } @@ -211,13 +198,12 @@ kill_with_rst: */ if (sysctl_tcp_rfc1337 == 0) { kill: - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); return TCP_TW_SUCCESS; } } - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); if (tmp_opt.saw_tstamp) { tcptw->tw_ts_recent = tmp_opt.rcv_tsval; @@ -267,8 +253,7 @@ kill: * Do not reschedule in the last case. */ if (paws_reject || th->ack) - inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN); return tcp_timewait_check_oow_rate_limit( tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT); @@ -283,16 +268,15 @@ EXPORT_SYMBOL(tcp_timewait_state_process); */ void tcp_time_wait(struct sock *sk, int state, int timeo) { - struct inet_timewait_sock *tw = NULL; const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); + struct inet_timewait_sock *tw; bool recycle_ok = false; if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp) recycle_ok = tcp_remember_stamp(sk); - if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets) - tw = inet_twsk_alloc(sk, state); + tw = inet_twsk_alloc(sk, &tcp_death_row, state); if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); @@ -355,8 +339,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) timeo = TCP_TIMEWAIT_LEN; } - inet_twsk_schedule(tw, &tcp_death_row, timeo, - TCP_TIMEWAIT_LEN); + inet_twsk_schedule(tw, timeo); inet_twsk_put(tw); } else { /* Sorry, if we're out of memory, just CLOSE this @@ -628,10 +611,16 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time) && - !inet_rtx_syn_ack(sk, req)) - mod_timer_pending(&req->rsk_timer, jiffies + - min(TCP_TIMEOUT_INIT << req->num_timeout, - TCP_RTO_MAX)); + !inet_rtx_syn_ack(sk, req)) { + unsigned long expires = jiffies; + + expires += min(TCP_TIMEOUT_INIT << req->num_timeout, + TCP_RTO_MAX); + if (!fastopen) + mod_timer_pending(&req->rsk_timer, expires); + else + req->rsk_timer.expires = expires; + } return NULL; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7404e5238e00..8c8d7e06b72f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -518,17 +518,26 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, if (unlikely(OPTION_FAST_OPEN_COOKIE & options)) { struct tcp_fastopen_cookie *foc = opts->fastopen_cookie; + u8 *p = (u8 *)ptr; + u32 len; /* Fast Open option length */ + + if (foc->exp) { + len = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; + *ptr = htonl((TCPOPT_EXP << 24) | (len << 16) | + TCPOPT_FASTOPEN_MAGIC); + p += TCPOLEN_EXP_FASTOPEN_BASE; + } else { + len = TCPOLEN_FASTOPEN_BASE + foc->len; + *p++ = TCPOPT_FASTOPEN; + *p++ = len; + } - *ptr++ = htonl((TCPOPT_EXP << 24) | - ((TCPOLEN_EXP_FASTOPEN_BASE + foc->len) << 16) | - TCPOPT_FASTOPEN_MAGIC); - - memcpy(ptr, foc->val, foc->len); - if ((foc->len & 3) == 2) { - u8 *align = ((u8 *)ptr) + foc->len; - align[0] = align[1] = TCPOPT_NOP; + memcpy(p, foc->val, foc->len); + if ((len & 3) == 2) { + p[foc->len] = TCPOPT_NOP; + p[foc->len + 1] = TCPOPT_NOP; } - ptr += (foc->len + 3) >> 2; + ptr += (len + 3) >> 2; } } @@ -583,13 +592,17 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (fastopen && fastopen->cookie.len >= 0) { - u32 need = TCPOLEN_EXP_FASTOPEN_BASE + fastopen->cookie.len; + u32 need = fastopen->cookie.len; + + need += fastopen->cookie.exp ? TCPOLEN_EXP_FASTOPEN_BASE : + TCPOLEN_FASTOPEN_BASE; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; opts->fastopen_cookie = &fastopen->cookie; remaining -= need; tp->syn_fastopen = 1; + tp->syn_fastopen_exp = fastopen->cookie.exp ? 1 : 0; } } @@ -641,8 +654,11 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - if (foc && foc->len >= 0) { - u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; + if (foc != NULL && foc->len >= 0) { + u32 need = foc->len; + + need += foc->exp ? TCPOLEN_EXP_FASTOPEN_BASE : + TCPOLEN_FASTOPEN_BASE; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { opts->options |= OPTION_FAST_OPEN_COOKIE; @@ -2978,6 +2994,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, rcu_read_unlock(); #endif + /* Do not fool tcpdump (if any), clean our debris */ + skb->tstamp.tv64 = 0; return skb; } EXPORT_SYMBOL(tcp_make_synack); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 2568fd282873..8c65dc147d8b 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -167,7 +167,7 @@ static int tcp_write_timeout(struct sock *sk) if (icsk->icsk_retransmits) { dst_negative_advice(sk); if (tp->syn_fastopen || tp->syn_data) - tcp_fastopen_cache_set(sk, 0, NULL, true); + tcp_fastopen_cache_set(sk, 0, NULL, true, 0); if (tp->syn_data) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2162fc6ce1c1..d10b7e0112eb 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -433,7 +433,6 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } - /* called with read_rcu_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, @@ -1171,7 +1170,6 @@ out: return ret; } - /** * first_packet_length - return length of first packet in receive queue * @sk: socket @@ -1355,7 +1353,6 @@ csum_copy_err: goto try_again; } - int udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -1579,7 +1576,6 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); @@ -1609,7 +1605,6 @@ drop: return -1; } - static void flush_stack(struct sock **stack, unsigned int count, struct sk_buff *skb, unsigned int final) { diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index c83b35485056..6bb98cc193c9 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -75,7 +75,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); -int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, +int udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck) @@ -92,7 +92,7 @@ int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb, udp_set_csum(nocheck, skb, src, dst, skb->len); - return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP, + return iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index cac7468db0a1..60b032f58ccc 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -22,7 +22,7 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb) return xfrm4_extract_header(skb); } -static inline int xfrm4_rcv_encap_finish(struct sk_buff *skb) +static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb) { if (!skb_dst(skb)) { const struct iphdr *iph = ip_hdr(skb); @@ -52,7 +52,8 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) iph->tot_len = htons(skb->len); ip_send_check(iph); - NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, NULL, skb, + skb->dev, NULL, xfrm4_rcv_encap_finish); return 0; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index dab73813cb92..2878dbfffeb7 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -69,7 +69,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm4_prepare_output); -int xfrm4_output_finish(struct sk_buff *skb) +int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); @@ -77,26 +77,26 @@ int xfrm4_output_finish(struct sk_buff *skb) IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; #endif - return xfrm_output(skb); + return xfrm_output(sk, skb); } -static int __xfrm4_output(struct sk_buff *skb) +static int __xfrm4_output(struct sock *sk, struct sk_buff *skb) { struct xfrm_state *x = skb_dst(skb)->xfrm; #ifdef CONFIG_NETFILTER if (!x) { IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif - return x->outer_mode->afinfo->output_finish(skb); + return x->outer_mode->afinfo->output_finish(sk, skb); } int xfrm4_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, skb, + return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, sk, skb, NULL, skb_dst(skb)->dev, __xfrm4_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 033f17816ef4..871641bc1ed4 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -246,7 +246,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f724329d7436..b5e6cc1d4a73 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -760,7 +760,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_set_inner_protocol(skb, protocol); - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(NULL, skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); return 0; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index fb97f7f8d4ed..f2e464eba5ef 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -46,8 +46,7 @@ #include <net/xfrm.h> #include <net/inet_ecn.h> - -int ip6_rcv_finish(struct sk_buff *skb) +int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb) { if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; @@ -183,7 +182,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, + dev, NULL, ip6_rcv_finish); err: IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); @@ -198,7 +198,7 @@ drop: */ -static int ip6_input_finish(struct sk_buff *skb) +static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; @@ -277,7 +277,8 @@ discard: int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb, + skb->dev, NULL, ip6_input_finish); } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 654f245aa930..7fde1f265c90 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,7 +56,7 @@ #include <net/checksum.h> #include <linux/mroute6.h> -static int ip6_finish_output2(struct sk_buff *skb) +static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; @@ -70,7 +70,7 @@ static int ip6_finish_output2(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && + if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_socket(dev_net(dev), skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, @@ -82,7 +82,7 @@ static int ip6_finish_output2(struct sk_buff *skb) */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, - newskb, NULL, newskb->dev, + sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { @@ -122,14 +122,14 @@ static int ip6_finish_output2(struct sk_buff *skb) return -EINVAL; } -static int ip6_finish_output(struct sk_buff *skb) +static int ip6_finish_output(struct sock *sk, struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) - return ip6_fragment(skb, ip6_finish_output2); + return ip6_fragment(sk, skb, ip6_finish_output2); else - return ip6_finish_output2(skb); + return ip6_finish_output2(sk, skb); } int ip6_output(struct sock *sk, struct sk_buff *skb) @@ -143,7 +143,8 @@ int ip6_output(struct sock *sk, struct sk_buff *skb) return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, + NULL, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } @@ -223,8 +224,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - dst->dev, dst_output); + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, dst->dev, dst_output_sk); } skb->dev = dst->dev; @@ -316,10 +317,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) return 0; } -static inline int ip6_forward_finish(struct sk_buff *skb) +static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb) { skb_sender_cpu_clear(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -511,7 +512,8 @@ int ip6_forward(struct sk_buff *skb) IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, + skb->dev, dst->dev, ip6_forward_finish); error: @@ -538,7 +540,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); @@ -667,7 +670,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip6_copy_metadata(frag, skb); } - err = output(skb); + err = output(sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); @@ -800,7 +803,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - err = output(frag); + err = output(sk, frag); if (err) goto fail; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index b6a211a150b2..5cafd92c2312 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1100,7 +1100,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(NULL, skb, dev); if (ndst) ip6_tnl_dst_store(t, ndst); return 0; diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 32d9b268e7d8..bba8903e871f 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -62,7 +62,8 @@ error: } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, @@ -97,7 +98,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, ip6h->daddr = *daddr; ip6h->saddr = *saddr; - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(sk, skb, dev); return 0; } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index b53148444e15..ed9d681207fa 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -288,8 +288,7 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, static void vti6_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n = net_generic(t->net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 8493a22e74eb..74ceb73c1c9a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1986,13 +1986,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif -static inline int ip6mr_forward2_finish(struct sk_buff *skb) +static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb) { IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTOCTETS, skb->len); - return dst_output(skb); + return dst_output_sk(sk, skb); } /* @@ -2064,7 +2064,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, + skb->dev, dev, ip6mr_forward2_finish); out_free: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index fac1f27e428e..083b2927fc67 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1644,8 +1644,9 @@ static void mld_sendpack(struct sk_buff *skb) payload_len = skb->len; - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net->ipv6.igmp_sk, skb, NULL, skb->dev, + dst_output_sk); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); @@ -2007,8 +2008,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) } skb_dst_set(skb, dst); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, skb->dev, dst_output_sk); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 71fde6cafb35..96f153c0846b 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -463,8 +463,9 @@ static void ndisc_send_skb(struct sk_buff *skb, idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, dst->dev, + dst_output_sk); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index e2b882056751..a45db0b4785c 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -75,7 +75,7 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, + NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm, state->in, state->out, state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index 3afdce03d94e..94b4c6dfb400 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -13,6 +13,7 @@ #include <net/ip6_checksum.h> #include <net/netfilter/ipv6/nf_reject.h> #include <linux/netfilter_ipv6.h> +#include <linux/netfilter_bridge.h> #include <net/netfilter/ipv6/nf_reject.h> const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, @@ -195,7 +196,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) */ if (oldskb->nf_bridge) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; + + nskb->dev = nf_bridge_get_physindev(oldskb); nskb->protocol = htons(ETH_P_IPV6); ip6h->payload_len = htons(sizeof(struct tcphdr)); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 529c119cbb14..cd1ac1637a05 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -18,19 +18,16 @@ #include <net/netfilter/ipv6/nf_nat_masquerade.h> static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_masq *priv = nft_expr_priv(expr); struct nf_nat_range range; - unsigned int verdict; memset(&range, 0, sizeof(range)); range.flags = priv->flags; - verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); - - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); } static struct nft_expr_type nft_masq_ipv6_type; diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index 11820b6b3613..effd393bd517 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -18,26 +18,25 @@ #include <net/netfilter/nf_nat_redirect.h> static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_redir *priv = nft_expr_priv(expr); struct nf_nat_range range; - unsigned int verdict; memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { range.min_proto.all = - *(__be16 *)&data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min], range.max_proto.all = - *(__be16 *)&data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max], range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } range.flags |= priv->flags; - verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, + pkt->ops->hooknum); } static struct nft_expr_type nft_redir_ipv6_type; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index f73285924144..d0d1540ecf87 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -20,7 +20,7 @@ #include <net/netfilter/ipv6/nf_reject.h> static void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -34,9 +34,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, case NFT_REJECT_TCP_RST: nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); break; + default: + break; } - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; } static struct nft_expr_type nft_reject_ipv6_type; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 4016a6ef9d61..85892af57364 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -136,7 +136,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) EXPORT_SYMBOL(ip6_dst_hoplimit); #endif -int __ip6_local_out(struct sk_buff *skb) +static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { int len; @@ -146,19 +146,30 @@ int __ip6_local_out(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); - return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - skb_dst(skb)->dev, dst_output); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); +} + +int __ip6_local_out(struct sk_buff *skb) +{ + return __ip6_local_out_sk(skb->sk, skb); } EXPORT_SYMBOL_GPL(__ip6_local_out); -int ip6_local_out(struct sk_buff *skb) +int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { int err; - err = __ip6_local_out(skb); + err = __ip6_local_out_sk(sk, skb); if (likely(err == 1)) - err = dst_output(skb); + err = dst_output_sk(sk, skb); return err; } +EXPORT_SYMBOL_GPL(ip6_local_out_sk); + +int ip6_local_out(struct sk_buff *skb) +{ + return ip6_local_out_sk(skb->sk, skb); +} EXPORT_SYMBOL_GPL(ip6_local_out); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 79ccdb4c1b33..8072bd4139b7 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -652,8 +652,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, goto error_fault; IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - rt->dst.dev, dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, rt->dst.dev, dst_output_sk); if (err > 0) err = net_xmit_errno(err); if (err) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6cf2026a9cea..ac35a28599be 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -983,7 +983,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, + err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f73a97f6e68e..ad51df85aa00 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1486,7 +1486,7 @@ do_time_wait: ntohs(th->dest), tcp_v6_iif(skb)); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); sk = sk2; tcp_v6_restore_cb(skb); @@ -1728,9 +1728,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) static void get_timewait6_sock(struct seq_file *seq, struct inet_timewait_sock *tw, int i) { + long delta = tw->tw_timer.expires - jiffies; const struct in6_addr *dest, *src; __u16 destp, srcp; - s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = &tw->tw_v6_daddr; src = &tw->tw_v6_rcv_saddr; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 120aff9aa010..3477c919fcc8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -120,7 +120,6 @@ static u32 udp6_portaddr_hash(const struct net *net, return hash ^ port; } - int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = @@ -385,7 +384,6 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be } EXPORT_SYMBOL_GPL(udp6_lib_lookup); - /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -1555,7 +1553,6 @@ static struct inet_protosw udpv6_protosw = { .flags = INET_PROTOSW_PERMANENT, }; - int __init udpv6_init(void) { int ret; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index f48fbe4d16f5..74bd17882a2f 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -42,7 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, + skb->dev, NULL, ip6_rcv_finish); return -1; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 010f8bd2d577..09c76a7b474d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -int xfrm6_output_finish(struct sk_buff *skb) +int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); @@ -128,10 +128,10 @@ int xfrm6_output_finish(struct sk_buff *skb) IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; #endif - return xfrm_output(skb); + return xfrm_output(sk, skb); } -static int __xfrm6_output(struct sk_buff *skb) +static int __xfrm6_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; @@ -140,7 +140,7 @@ static int __xfrm6_output(struct sk_buff *skb) #ifdef CONFIG_NETFILTER if (!x) { IP6CB(skb)->flags |= IP6SKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif @@ -160,14 +160,15 @@ static int __xfrm6_output(struct sk_buff *skb) if (x->props.mode == XFRM_MODE_TUNNEL && ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); + return ip6_fragment(sk, skb, + x->outer_mode->afinfo->output_finish); } - return x->outer_mode->afinfo->output_finish(skb); + return x->outer_mode->afinfo->output_finish(sk, skb); } int xfrm6_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, NULL, skb_dst(skb)->dev, __xfrm6_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 20522492d8cc..cce9d425c718 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -188,6 +188,43 @@ ieee80211_wake_queue_agg(struct ieee80211_sub_if_data *sdata, int tid) __release(agg_queue); } +static void +ieee80211_agg_stop_txq(struct sta_info *sta, int tid) +{ + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi; + + if (!txq) + return; + + txqi = to_txq_info(txq); + + /* Lock here to protect against further seqno updates on dequeue */ + spin_lock_bh(&txqi->queue.lock); + set_bit(IEEE80211_TXQ_STOP, &txqi->flags); + spin_unlock_bh(&txqi->queue.lock); +} + +static void +ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) +{ + struct ieee80211_txq *txq = sta->sta.txq[tid]; + struct txq_info *txqi; + + if (!txq) + return; + + txqi = to_txq_info(txq); + + if (enable) + set_bit(IEEE80211_TXQ_AMPDU, &txqi->flags); + else + clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags); + + clear_bit(IEEE80211_TXQ_STOP, &txqi->flags); + drv_wake_tx_queue(sta->sdata->local, txqi); +} + /* * splice packets from the STA's pending to the local pending, * requires a call to ieee80211_agg_splice_finish later @@ -247,6 +284,7 @@ static void ieee80211_remove_tid_tx(struct sta_info *sta, int tid) ieee80211_assign_tid_tx(sta, tid, NULL); ieee80211_agg_splice_finish(sta->sdata, tid); + ieee80211_agg_start_txq(sta, tid, false); kfree_rcu(tid_tx, rcu_head); } @@ -418,6 +456,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state); + ieee80211_agg_stop_txq(sta, tid); + /* * Make sure no packets are being processed. This ensures that * we have a valid starting sequence number and that in-flight @@ -440,6 +480,8 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) ieee80211_agg_splice_finish(sdata, tid); spin_unlock_bh(&sta->lock); + ieee80211_agg_start_txq(sta, tid, false); + kfree_rcu(tid_tx, rcu_head); return; } @@ -669,6 +711,8 @@ static void ieee80211_agg_tx_operational(struct ieee80211_local *local, ieee80211_agg_splice_finish(sta->sdata, tid); spin_unlock_bh(&sta->lock); + + ieee80211_agg_start_txq(sta, tid, true); } void ieee80211_start_tx_ba_cb(struct ieee80211_vif *vif, u8 *ra, u16 tid) diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 0a39d3db951a..26e1ca8a474a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1367,4 +1367,16 @@ drv_tdls_recv_channel_switch(struct ieee80211_local *local, trace_drv_return_void(local); } +static inline void drv_wake_tx_queue(struct ieee80211_local *local, + struct txq_info *txq) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->txq.vif); + + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_wake_tx_queue(local, sdata, txq); + local->ops->wake_tx_queue(&local->hw, &txq->txq); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 487f5e2a9283..ab46ab4a7249 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -26,6 +26,7 @@ #include <linux/etherdevice.h> #include <linux/leds.h> #include <linux/idr.h> +#include <linux/rhashtable.h> #include <net/ieee80211_radiotap.h> #include <net/cfg80211.h> #include <net/mac80211.h> @@ -810,6 +811,19 @@ struct mac80211_qos_map { struct rcu_head rcu_head; }; +enum txq_info_flags { + IEEE80211_TXQ_STOP, + IEEE80211_TXQ_AMPDU, +}; + +struct txq_info { + struct sk_buff_head queue; + unsigned long flags; + + /* keep last! */ + struct ieee80211_txq txq; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -852,6 +866,7 @@ struct ieee80211_sub_if_data { bool control_port_no_encrypt; int encrypt_headroom; + atomic_t txqs_len[IEEE80211_NUM_ACS]; struct ieee80211_tx_queue_params tx_conf[IEEE80211_NUM_ACS]; struct mac80211_qos_map __rcu *qos_map; @@ -1187,7 +1202,7 @@ struct ieee80211_local { spinlock_t tim_lock; unsigned long num_sta; struct list_head sta_list; - struct sta_info __rcu *sta_hash[STA_HASH_SIZE]; + struct rhashtable sta_hash; struct timer_list sta_cleanup; int sta_generation; @@ -1449,6 +1464,10 @@ static inline struct ieee80211_local *hw_to_local( return container_of(hw, struct ieee80211_local, hw); } +static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq) +{ + return container_of(txq, struct txq_info, txq); +} static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) { @@ -1905,6 +1924,9 @@ static inline bool ieee80211_can_run_worker(struct ieee80211_local *local) return true; } +void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct txq_info *txq, int tid); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *bssid, @@ -1943,10 +1965,6 @@ int __ieee80211_request_smps_ap(struct ieee80211_sub_if_data *sdata, void ieee80211_recalc_smps(struct ieee80211_sub_if_data *sdata); void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata); -size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, - const u8 *after_ric, int n_after_ric, - size_t offset); size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset); u8 *ieee80211_ie_build_ht_cap(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap, u16 cap); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index a0cd97fd0c49..b4ac596a7cb7 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -969,6 +969,13 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); + if (sdata->vif.txq) { + struct txq_info *txqi = to_txq_info(sdata->vif.txq); + + ieee80211_purge_tx_queue(&local->hw, &txqi->queue); + atomic_set(&sdata->txqs_len[txqi->txq.ac], 0); + } + if (local->open_count == 0) ieee80211_clear_tx_pending(local); @@ -1654,6 +1661,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, { struct net_device *ndev = NULL; struct ieee80211_sub_if_data *sdata = NULL; + struct txq_info *txqi; int ret, i; int txqs = 1; @@ -1673,10 +1681,18 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_assign_perm_addr(local, wdev->address, type); memcpy(sdata->vif.addr, wdev->address, ETH_ALEN); } else { + int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size, + sizeof(void *)); + int txq_size = 0; + + if (local->ops->wake_tx_queue) + txq_size += sizeof(struct txq_info) + + local->hw.txq_data_size; + if (local->hw.queues >= IEEE80211_NUM_ACS) txqs = IEEE80211_NUM_ACS; - ndev = alloc_netdev_mqs(sizeof(*sdata) + local->hw.vif_data_size, + ndev = alloc_netdev_mqs(size + txq_size, name, name_assign_type, ieee80211_if_setup, txqs, 1); if (!ndev) @@ -1711,6 +1727,11 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, memcpy(sdata->vif.addr, ndev->dev_addr, ETH_ALEN); memcpy(sdata->name, ndev->name, IFNAMSIZ); + if (txq_size) { + txqi = netdev_priv(ndev) + size; + ieee80211_init_tx_queue(sdata, NULL, txqi, 0); + } + sdata->dev = ndev; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4977967c8b00..df3051d96aff 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -557,6 +557,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, local = wiphy_priv(wiphy); + if (sta_info_init(local)) + goto err_free; + local->hw.wiphy = wiphy; local->hw.priv = (char *)local + ALIGN(sizeof(*local), NETDEV_ALIGN); @@ -629,8 +632,6 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, spin_lock_init(&local->ack_status_lock); idr_init(&local->ack_status_frames); - sta_info_init(local); - for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { skb_queue_head_init(&local->pending[i]); atomic_set(&local->agg_queue_stop[i], 0); @@ -650,6 +651,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_roc_setup(local); return &local->hw; + err_free: + wiphy_free(wiphy); + return NULL; } EXPORT_SYMBOL(ieee80211_alloc_hw_nm); @@ -1035,6 +1039,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->dynamic_ps_forced_timeout = -1; + if (!local->hw.txq_ac_max_pending) + local->hw.txq_ac_max_pending = 64; + result = ieee80211_wep_init(local); if (result < 0) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", @@ -1173,7 +1180,6 @@ void ieee80211_unregister_hw(struct ieee80211_hw *hw) destroy_workqueue(local->workqueue); wiphy_unregister(local->hw.wiphy); - sta_info_stop(local); ieee80211_wep_free(local); ieee80211_led_exit(local); kfree(local->int_scan_req); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 00103f36dcbf..26053bf2faa8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1348,15 +1348,15 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata, */ if (has_80211h_pwr && (!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) { - sdata_info(sdata, - "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", - pwr_level_80211h, chan_pwr, pwr_reduction_80211h, - sdata->u.mgd.bssid); + sdata_dbg(sdata, + "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n", + pwr_level_80211h, chan_pwr, pwr_reduction_80211h, + sdata->u.mgd.bssid); new_ap_level = pwr_level_80211h; } else { /* has_cisco_pwr is always true here. */ - sdata_info(sdata, - "Limiting TX power to %d dBm as advertised by %pM\n", - pwr_level_cisco, sdata->u.mgd.bssid); + sdata_dbg(sdata, + "Limiting TX power to %d dBm as advertised by %pM\n", + pwr_level_cisco, sdata->u.mgd.bssid); new_ap_level = pwr_level_cisco; } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ef6e8a6c4253..247552a7f6c2 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -69,14 +69,39 @@ rix_to_ndx(struct minstrel_sta_info *mi, int rix) return i; } +/* return current EMWA throughput */ +int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma) +{ + int usecs; + + usecs = mr->perfect_tx_time; + if (!usecs) + usecs = 1000000; + + /* reset thr. below 10% success */ + if (mr->stats.prob_ewma < MINSTREL_FRAC(10, 100)) + return 0; + + if (prob_ewma > MINSTREL_FRAC(90, 100)) + return MINSTREL_TRUNC(100000 * (MINSTREL_FRAC(90, 100) / usecs)); + else + return MINSTREL_TRUNC(100000 * (prob_ewma / usecs)); +} + /* find & sort topmost throughput rates */ static inline void minstrel_sort_best_tp_rates(struct minstrel_sta_info *mi, int i, u8 *tp_list) { int j = MAX_THR_RATES; + struct minstrel_rate_stats *tmp_mrs = &mi->r[j - 1].stats; + struct minstrel_rate_stats *cur_mrs = &mi->r[i].stats; - while (j > 0 && mi->r[i].stats.cur_tp > mi->r[tp_list[j - 1]].stats.cur_tp) + while (j > 0 && (minstrel_get_tp_avg(&mi->r[i], cur_mrs->prob_ewma) > + minstrel_get_tp_avg(&mi->r[tp_list[j - 1]], tmp_mrs->prob_ewma))) { j--; + tmp_mrs = &mi->r[tp_list[j - 1]].stats; + } + if (j < MAX_THR_RATES - 1) memmove(&tp_list[j + 1], &tp_list[j], MAX_THR_RATES - (j + 1)); if (j < MAX_THR_RATES) @@ -127,13 +152,47 @@ minstrel_update_rates(struct minstrel_priv *mp, struct minstrel_sta_info *mi) rate_control_set_rates(mp->hw, mi->sta, ratetbl); } +/* +* Recalculate statistics and counters of a given rate +*/ +void +minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs) +{ + if (unlikely(mrs->attempts > 0)) { + mrs->sample_skipped = 0; + mrs->cur_prob = MINSTREL_FRAC(mrs->success, mrs->attempts); + if (unlikely(!mrs->att_hist)) { + mrs->prob_ewma = mrs->cur_prob; + } else { + /* update exponential weighted moving variance */ + mrs->prob_ewmsd = minstrel_ewmsd(mrs->prob_ewmsd, + mrs->cur_prob, + mrs->prob_ewma, + EWMA_LEVEL); + + /*update exponential weighted moving avarage */ + mrs->prob_ewma = minstrel_ewma(mrs->prob_ewma, + mrs->cur_prob, + EWMA_LEVEL); + } + mrs->att_hist += mrs->attempts; + mrs->succ_hist += mrs->success; + } else { + mrs->sample_skipped++; + } + + mrs->last_success = mrs->success; + mrs->last_attempts = mrs->attempts; + mrs->success = 0; + mrs->attempts = 0; +} + static void minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) { u8 tmp_tp_rate[MAX_THR_RATES]; u8 tmp_prob_rate = 0; - u32 usecs; - int i; + int i, tmp_cur_tp, tmp_prob_tp; for (i = 0; i < MAX_THR_RATES; i++) tmp_tp_rate[i] = 0; @@ -141,38 +200,15 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; struct minstrel_rate_stats *mrs = &mi->r[i].stats; + struct minstrel_rate_stats *tmp_mrs = &mi->r[tmp_prob_rate].stats; - usecs = mr->perfect_tx_time; - if (!usecs) - usecs = 1000000; - - if (unlikely(mrs->attempts > 0)) { - mrs->sample_skipped = 0; - mrs->cur_prob = MINSTREL_FRAC(mrs->success, - mrs->attempts); - mrs->succ_hist += mrs->success; - mrs->att_hist += mrs->attempts; - mrs->probability = minstrel_ewma(mrs->probability, - mrs->cur_prob, - EWMA_LEVEL); - } else - mrs->sample_skipped++; - - mrs->last_success = mrs->success; - mrs->last_attempts = mrs->attempts; - mrs->success = 0; - mrs->attempts = 0; - - /* Update throughput per rate, reset thr. below 10% success */ - if (mrs->probability < MINSTREL_FRAC(10, 100)) - mrs->cur_tp = 0; - else - mrs->cur_tp = mrs->probability * (1000000 / usecs); + /* Update statistics of success probability per rate */ + minstrel_calc_rate_stats(mrs); /* Sample less often below the 10% chance of success. * Sample less often above the 95% chance of success. */ - if (mrs->probability > MINSTREL_FRAC(95, 100) || - mrs->probability < MINSTREL_FRAC(10, 100)) { + if (mrs->prob_ewma > MINSTREL_FRAC(95, 100) || + mrs->prob_ewma < MINSTREL_FRAC(10, 100)) { mr->adjusted_retry_count = mrs->retry_count >> 1; if (mr->adjusted_retry_count > 2) mr->adjusted_retry_count = 2; @@ -192,11 +228,14 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) * choose the maximum throughput rate as max_prob_rate * (2) if all success probabilities < 95%, the rate with * highest success probability is chosen as max_prob_rate */ - if (mrs->probability >= MINSTREL_FRAC(95, 100)) { - if (mrs->cur_tp >= mi->r[tmp_prob_rate].stats.cur_tp) + if (mrs->prob_ewma >= MINSTREL_FRAC(95, 100)) { + tmp_cur_tp = minstrel_get_tp_avg(mr, mrs->prob_ewma); + tmp_prob_tp = minstrel_get_tp_avg(&mi->r[tmp_prob_rate], + tmp_mrs->prob_ewma); + if (tmp_cur_tp >= tmp_prob_tp) tmp_prob_rate = i; } else { - if (mrs->probability >= mi->r[tmp_prob_rate].stats.probability) + if (mrs->prob_ewma >= tmp_mrs->prob_ewma) tmp_prob_rate = i; } } @@ -215,7 +254,7 @@ minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) #endif /* Reset update timer */ - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; minstrel_update_rates(mp, mi); } @@ -253,7 +292,7 @@ minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, if (mi->sample_deferred > 0) mi->sample_deferred--; - if (time_after(jiffies, mi->stats_update + + if (time_after(jiffies, mi->last_stats_update + (mp->update_interval * HZ) / 1000)) minstrel_update_stats(mp, mi); } @@ -385,7 +424,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, * has a probability of >95%, we shouldn't be attempting * to use it, as this only wastes precious airtime */ if (!mrr_capable && - (mi->r[ndx].stats.probability > MINSTREL_FRAC(95, 100))) + (mi->r[ndx].stats.prob_ewma > MINSTREL_FRAC(95, 100))) return; mi->prev_sample = true; @@ -519,7 +558,7 @@ minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, } mi->n_rates = n; - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; init_sample_table(mi); minstrel_update_rates(mp, mi); @@ -553,7 +592,7 @@ minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) if (!mi->sample_table) goto error1; - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; return mi; error1: @@ -663,12 +702,18 @@ minstrel_free(void *priv) static u32 minstrel_get_expected_throughput(void *priv_sta) { struct minstrel_sta_info *mi = priv_sta; + struct minstrel_rate_stats *tmp_mrs; int idx = mi->max_tp_rate[0]; + int tmp_cur_tp; /* convert pkt per sec in kbps (1200 is the average pkt size used for * computing cur_tp */ - return MINSTREL_TRUNC(mi->r[idx].stats.cur_tp) * 1200 * 8 / 1024; + tmp_mrs = &mi->r[idx].stats; + tmp_cur_tp = minstrel_get_tp_avg(&mi->r[idx], tmp_mrs->prob_ewma); + tmp_cur_tp = tmp_cur_tp * 1200 * 8 / 1024; + + return tmp_cur_tp; } const struct rate_control_ops mac80211_minstrel = { diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 410efe620c57..c230bbe93262 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -13,7 +13,6 @@ #define EWMA_DIV 128 #define SAMPLE_COLUMNS 10 /* number of columns in sample table */ - /* scaled fraction values */ #define MINSTREL_SCALE 16 #define MINSTREL_FRAC(val, div) (((val) << MINSTREL_SCALE) / div) @@ -24,11 +23,34 @@ /* * Perform EWMA (Exponentially Weighted Moving Average) calculation - */ + */ static inline int minstrel_ewma(int old, int new, int weight) { - return (new * (EWMA_DIV - weight) + old * weight) / EWMA_DIV; + int diff, incr; + + diff = new - old; + incr = (EWMA_DIV - weight) * diff / EWMA_DIV; + + return old + incr; +} + +/* + * Perform EWMSD (Exponentially Weighted Moving Standard Deviation) calculation + */ +static inline int +minstrel_ewmsd(int old_ewmsd, int cur_prob, int prob_ewma, int weight) +{ + int diff, incr, tmp_var; + + /* calculate exponential weighted moving variance */ + diff = MINSTREL_TRUNC((cur_prob - prob_ewma) * 1000000); + incr = (EWMA_DIV - weight) * diff / EWMA_DIV; + tmp_var = old_ewmsd * old_ewmsd; + tmp_var = weight * (tmp_var + diff * incr / 1000000) / EWMA_DIV; + + /* return standard deviation */ + return (u16) int_sqrt(tmp_var); } struct minstrel_rate_stats { @@ -39,11 +61,13 @@ struct minstrel_rate_stats { /* total attempts/success counters */ u64 att_hist, succ_hist; - /* current throughput */ - unsigned int cur_tp; - - /* packet delivery probabilities */ - unsigned int cur_prob, probability; + /* statistis of packet delivery probability + * cur_prob - current prob within last update intervall + * prob_ewma - exponential weighted moving average of prob + * prob_ewmsd - exp. weighted moving standard deviation of prob */ + unsigned int cur_prob; + unsigned int prob_ewma; + u16 prob_ewmsd; /* maximum retry counts */ u8 retry_count; @@ -71,7 +95,7 @@ struct minstrel_rate { struct minstrel_sta_info { struct ieee80211_sta *sta; - unsigned long stats_update; + unsigned long last_stats_update; unsigned int sp_ack_dur; unsigned int rate_avg; @@ -95,6 +119,7 @@ struct minstrel_sta_info { #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *dbg_stats; + struct dentry *dbg_stats_csv; #endif }; @@ -121,7 +146,6 @@ struct minstrel_priv { u32 fixed_rate_idx; struct dentry *dbg_fixed_rate; #endif - }; struct minstrel_debugfs_info { @@ -133,8 +157,13 @@ extern const struct rate_control_ops mac80211_minstrel; void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); void minstrel_remove_sta_debugfs(void *priv, void *priv_sta); +/* Recalculate success probabilities and counters for a given rate using EWMA */ +void minstrel_calc_rate_stats(struct minstrel_rate_stats *mrs); +int minstrel_get_tp_avg(struct minstrel_rate *mr, int prob_ewma); + /* debugfs */ int minstrel_stats_open(struct inode *inode, struct file *file); +int minstrel_stats_csv_open(struct inode *inode, struct file *file); ssize_t minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); int minstrel_stats_release(struct inode *inode, struct file *file); diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c index 2acab1bcaa4b..1db5f7c3318a 100644 --- a/net/mac80211/rc80211_minstrel_debugfs.c +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -54,12 +54,28 @@ #include <net/mac80211.h> #include "rc80211_minstrel.h" +ssize_t +minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) +{ + struct minstrel_debugfs_info *ms; + + ms = file->private_data; + return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len); +} + +int +minstrel_stats_release(struct inode *inode, struct file *file) +{ + kfree(file->private_data); + return 0; +} + int minstrel_stats_open(struct inode *inode, struct file *file) { struct minstrel_sta_info *mi = inode->i_private; struct minstrel_debugfs_info *ms; - unsigned int i, tp, prob, eprob; + unsigned int i, tp_max, tp_avg, prob, eprob; char *p; ms = kmalloc(2048, GFP_KERNEL); @@ -68,8 +84,14 @@ minstrel_stats_open(struct inode *inode, struct file *file) file->private_data = ms; p = ms->buf; - p += sprintf(p, "rate tpt eprob *prob" - " *ok(*cum) ok( cum)\n"); + p += sprintf(p, "\n"); + p += sprintf(p, "best __________rate_________ ______" + "statistics______ ________last_______ " + "______sum-of________\n"); + p += sprintf(p, "rate [name idx airtime max_tp] [ ø(tp) ø(prob) " + "sd(prob)] [prob.|retry|suc|att] " + "[#success | #attempts]\n"); + for (i = 0; i < mi->n_rates; i++) { struct minstrel_rate *mr = &mi->r[i]; struct minstrel_rate_stats *mrs = &mi->r[i].stats; @@ -79,18 +101,26 @@ minstrel_stats_open(struct inode *inode, struct file *file) *(p++) = (i == mi->max_tp_rate[2]) ? 'C' : ' '; *(p++) = (i == mi->max_tp_rate[3]) ? 'D' : ' '; *(p++) = (i == mi->max_prob_rate) ? 'P' : ' '; - p += sprintf(p, "%3u%s", mr->bitrate / 2, + + p += sprintf(p, " %3u%s ", mr->bitrate / 2, (mr->bitrate & 1 ? ".5" : " ")); + p += sprintf(p, "%3u ", i); + p += sprintf(p, "%6u ", mr->perfect_tx_time); - tp = MINSTREL_TRUNC(mrs->cur_tp / 10); + tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); + tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma); prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); - eprob = MINSTREL_TRUNC(mrs->probability * 1000); + eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); - p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u" - " %4u(%4u) %9llu(%9llu)\n", - tp / 10, tp % 10, + p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u" + " %3u.%1u %3u %3u %-3u " + "%9llu %-9llu\n", + tp_max / 10, tp_max % 10, + tp_avg / 10, tp_avg % 10, eprob / 10, eprob % 10, + mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10, prob / 10, prob % 10, + mrs->retry_count, mrs->last_success, mrs->last_attempts, (unsigned long long)mrs->succ_hist, @@ -107,25 +137,75 @@ minstrel_stats_open(struct inode *inode, struct file *file) return 0; } -ssize_t -minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) +static const struct file_operations minstrel_stat_fops = { + .owner = THIS_MODULE, + .open = minstrel_stats_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, + .llseek = default_llseek, +}; + +int +minstrel_stats_csv_open(struct inode *inode, struct file *file) { + struct minstrel_sta_info *mi = inode->i_private; struct minstrel_debugfs_info *ms; + unsigned int i, tp_max, tp_avg, prob, eprob; + char *p; - ms = file->private_data; - return simple_read_from_buffer(buf, len, ppos, ms->buf, ms->len); -} + ms = kmalloc(2048, GFP_KERNEL); + if (!ms) + return -ENOMEM; + + file->private_data = ms; + p = ms->buf; + + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + struct minstrel_rate_stats *mrs = &mi->r[i].stats; + + p += sprintf(p, "%s" ,((i == mi->max_tp_rate[0]) ? "A" : "")); + p += sprintf(p, "%s" ,((i == mi->max_tp_rate[1]) ? "B" : "")); + p += sprintf(p, "%s" ,((i == mi->max_tp_rate[2]) ? "C" : "")); + p += sprintf(p, "%s" ,((i == mi->max_tp_rate[3]) ? "D" : "")); + p += sprintf(p, "%s" ,((i == mi->max_prob_rate) ? "P" : "")); + + p += sprintf(p, ",%u%s", mr->bitrate / 2, + (mr->bitrate & 1 ? ".5," : ",")); + p += sprintf(p, "%u,", i); + p += sprintf(p, "%u,",mr->perfect_tx_time); + + tp_max = minstrel_get_tp_avg(mr, MINSTREL_FRAC(100,100)); + tp_avg = minstrel_get_tp_avg(mr, mrs->prob_ewma); + prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + + p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u.%u,%u,%u,%u," + "%llu,%llu,%d,%d\n", + tp_max / 10, tp_max % 10, + tp_avg / 10, tp_avg % 10, + eprob / 10, eprob % 10, + mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10, + prob / 10, prob % 10, + mrs->retry_count, + mrs->last_success, + mrs->last_attempts, + (unsigned long long)mrs->succ_hist, + (unsigned long long)mrs->att_hist, + mi->total_packets - mi->sample_packets, + mi->sample_packets); + + } + ms->len = p - ms->buf; + + WARN_ON(ms->len + sizeof(*ms) > 2048); -int -minstrel_stats_release(struct inode *inode, struct file *file) -{ - kfree(file->private_data); return 0; } -static const struct file_operations minstrel_stat_fops = { +static const struct file_operations minstrel_stat_csv_fops = { .owner = THIS_MODULE, - .open = minstrel_stats_open, + .open = minstrel_stats_csv_open, .read = minstrel_stats_read, .release = minstrel_stats_release, .llseek = default_llseek, @@ -138,6 +218,9 @@ minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi, &minstrel_stat_fops); + + mi->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, dir, + mi, &minstrel_stat_csv_fops); } void @@ -146,4 +229,6 @@ minstrel_remove_sta_debugfs(void *priv, void *priv_sta) struct minstrel_sta_info *mi = priv_sta; debugfs_remove(mi->dbg_stats); + + debugfs_remove(mi->dbg_stats_csv); } diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 60698fc7042e..7430a1df2ab1 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -313,67 +313,35 @@ minstrel_get_ratestats(struct minstrel_ht_sta *mi, int index) return &mi->groups[index / MCS_GROUP_RATES].rates[index % MCS_GROUP_RATES]; } - /* - * Recalculate success probabilities and counters for a rate using EWMA + * Return current throughput based on the average A-MPDU length, taking into + * account the expected number of retransmissions and their expected length */ -static void -minstrel_calc_rate_ewma(struct minstrel_rate_stats *mr) +int +minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, + int prob_ewma) { - if (unlikely(mr->attempts > 0)) { - mr->sample_skipped = 0; - mr->cur_prob = MINSTREL_FRAC(mr->success, mr->attempts); - if (!mr->att_hist) - mr->probability = mr->cur_prob; - else - mr->probability = minstrel_ewma(mr->probability, - mr->cur_prob, EWMA_LEVEL); - mr->att_hist += mr->attempts; - mr->succ_hist += mr->success; - } else { - mr->sample_skipped++; - } - mr->last_success = mr->success; - mr->last_attempts = mr->attempts; - mr->success = 0; - mr->attempts = 0; -} - -/* - * Calculate throughput based on the average A-MPDU length, taking into account - * the expected number of retransmissions and their expected length - */ -static void -minstrel_ht_calc_tp(struct minstrel_ht_sta *mi, int group, int rate) -{ - struct minstrel_rate_stats *mr; unsigned int nsecs = 0; - unsigned int tp; - unsigned int prob; - mr = &mi->groups[group].rates[rate]; - prob = mr->probability; - - if (prob < MINSTREL_FRAC(1, 10)) { - mr->cur_tp = 0; - return; - } - - /* - * For the throughput calculation, limit the probability value to 90% to - * account for collision related packet error rate fluctuation - */ - if (prob > MINSTREL_FRAC(9, 10)) - prob = MINSTREL_FRAC(9, 10); + /* do not account throughput if sucess prob is below 10% */ + if (prob_ewma < MINSTREL_FRAC(10, 100)) + return 0; if (group != MINSTREL_CCK_GROUP) nsecs = 1000 * mi->overhead / MINSTREL_TRUNC(mi->avg_ampdu_len); nsecs += minstrel_mcs_groups[group].duration[rate]; - /* prob is scaled - see MINSTREL_FRAC above */ - tp = 1000000 * ((prob * 1000) / nsecs); - mr->cur_tp = MINSTREL_TRUNC(tp); + /* + * For the throughput calculation, limit the probability value to 90% to + * account for collision related packet error rate fluctuation + * (prob is scaled - see MINSTREL_FRAC above) + */ + if (prob_ewma > MINSTREL_FRAC(90, 100)) + return MINSTREL_TRUNC(100000 * ((MINSTREL_FRAC(90, 100) * 1000) + / nsecs)); + else + return MINSTREL_TRUNC(100000 * ((prob_ewma * 1000) / nsecs)); } /* @@ -387,22 +355,23 @@ static void minstrel_ht_sort_best_tp_rates(struct minstrel_ht_sta *mi, u16 index, u16 *tp_list) { - int cur_group, cur_idx, cur_thr, cur_prob; - int tmp_group, tmp_idx, tmp_thr, tmp_prob; + int cur_group, cur_idx, cur_tp_avg, cur_prob; + int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; int j = MAX_THR_RATES; cur_group = index / MCS_GROUP_RATES; cur_idx = index % MCS_GROUP_RATES; - cur_thr = mi->groups[cur_group].rates[cur_idx].cur_tp; - cur_prob = mi->groups[cur_group].rates[cur_idx].probability; + cur_prob = mi->groups[cur_group].rates[cur_idx].prob_ewma; + cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, cur_prob); do { tmp_group = tp_list[j - 1] / MCS_GROUP_RATES; tmp_idx = tp_list[j - 1] % MCS_GROUP_RATES; - tmp_thr = mi->groups[tmp_group].rates[tmp_idx].cur_tp; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability; - if (cur_thr < tmp_thr || - (cur_thr == tmp_thr && cur_prob <= tmp_prob)) + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, + tmp_prob); + if (cur_tp_avg < tmp_tp_avg || + (cur_tp_avg == tmp_tp_avg && cur_prob <= tmp_prob)) break; j--; } while (j > 0); @@ -422,16 +391,21 @@ static void minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) { struct minstrel_mcs_group_data *mg; - struct minstrel_rate_stats *mr; - int tmp_group, tmp_idx, tmp_tp, tmp_prob, max_tp_group; + struct minstrel_rate_stats *mrs; + int tmp_group, tmp_idx, tmp_tp_avg, tmp_prob; + int max_tp_group, cur_tp_avg, cur_group, cur_idx; + int max_gpr_group, max_gpr_idx; + int max_gpr_tp_avg, max_gpr_prob; + cur_group = index / MCS_GROUP_RATES; + cur_idx = index % MCS_GROUP_RATES; mg = &mi->groups[index / MCS_GROUP_RATES]; - mr = &mg->rates[index % MCS_GROUP_RATES]; + mrs = &mg->rates[index % MCS_GROUP_RATES]; tmp_group = mi->max_prob_rate / MCS_GROUP_RATES; tmp_idx = mi->max_prob_rate % MCS_GROUP_RATES; - tmp_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp; - tmp_prob = mi->groups[tmp_group].rates[tmp_idx].probability; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_tp_avg = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); /* if max_tp_rate[0] is from MCS_GROUP max_prob_rate get selected from * MCS_GROUP as well as CCK_GROUP rates do not allow aggregation */ @@ -440,15 +414,24 @@ minstrel_ht_set_best_prob_rate(struct minstrel_ht_sta *mi, u16 index) (max_tp_group != MINSTREL_CCK_GROUP)) return; - if (mr->probability > MINSTREL_FRAC(75, 100)) { - if (mr->cur_tp > tmp_tp) + if (mrs->prob_ewma > MINSTREL_FRAC(75, 100)) { + cur_tp_avg = minstrel_ht_get_tp_avg(mi, cur_group, cur_idx, + mrs->prob_ewma); + if (cur_tp_avg > tmp_tp_avg) mi->max_prob_rate = index; - if (mr->cur_tp > mg->rates[mg->max_group_prob_rate].cur_tp) + + max_gpr_group = mg->max_group_prob_rate / MCS_GROUP_RATES; + max_gpr_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; + max_gpr_prob = mi->groups[max_gpr_group].rates[max_gpr_idx].prob_ewma; + max_gpr_tp_avg = minstrel_ht_get_tp_avg(mi, max_gpr_group, + max_gpr_idx, + max_gpr_prob); + if (cur_tp_avg > max_gpr_tp_avg) mg->max_group_prob_rate = index; } else { - if (mr->probability > tmp_prob) + if (mrs->prob_ewma > tmp_prob) mi->max_prob_rate = index; - if (mr->probability > mg->rates[mg->max_group_prob_rate].probability) + if (mrs->prob_ewma > mg->rates[mg->max_group_prob_rate].prob_ewma) mg->max_group_prob_rate = index; } } @@ -465,16 +448,18 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, u16 tmp_mcs_tp_rate[MAX_THR_RATES], u16 tmp_cck_tp_rate[MAX_THR_RATES]) { - unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp; + unsigned int tmp_group, tmp_idx, tmp_cck_tp, tmp_mcs_tp, tmp_prob; int i; tmp_group = tmp_cck_tp_rate[0] / MCS_GROUP_RATES; tmp_idx = tmp_cck_tp_rate[0] % MCS_GROUP_RATES; - tmp_cck_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_cck_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); tmp_group = tmp_mcs_tp_rate[0] / MCS_GROUP_RATES; tmp_idx = tmp_mcs_tp_rate[0] % MCS_GROUP_RATES; - tmp_mcs_tp = mi->groups[tmp_group].rates[tmp_idx].cur_tp; + tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; + tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); if (tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { @@ -493,8 +478,7 @@ static inline void minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; - struct minstrel_rate_stats *mr; - int tmp_max_streams, group; + int tmp_max_streams, group, tmp_idx, tmp_prob; int tmp_tp = 0; tmp_max_streams = minstrel_mcs_groups[mi->max_tp_rate[0] / @@ -503,11 +487,16 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) mg = &mi->groups[group]; if (!mg->supported || group == MINSTREL_CCK_GROUP) continue; - mr = minstrel_get_ratestats(mi, mg->max_group_prob_rate); - if (tmp_tp < mr->cur_tp && + + tmp_idx = mg->max_group_prob_rate % MCS_GROUP_RATES; + tmp_prob = mi->groups[group].rates[tmp_idx].prob_ewma; + + if (tmp_tp < minstrel_ht_get_tp_avg(mi, group, tmp_idx, tmp_prob) && (minstrel_mcs_groups[group].streams < tmp_max_streams)) { mi->max_prob_rate = mg->max_group_prob_rate; - tmp_tp = mr->cur_tp; + tmp_tp = minstrel_ht_get_tp_avg(mi, group, + tmp_idx, + tmp_prob); } } } @@ -525,8 +514,8 @@ static void minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; - struct minstrel_rate_stats *mr; - int group, i, j; + struct minstrel_rate_stats *mrs; + int group, i, j, cur_prob; u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_cck_tp_rate[MAX_THR_RATES], index; @@ -565,12 +554,12 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) index = MCS_GROUP_RATES * group + i; - mr = &mg->rates[i]; - mr->retry_updated = false; - minstrel_calc_rate_ewma(mr); - minstrel_ht_calc_tp(mi, group, i); + mrs = &mg->rates[i]; + mrs->retry_updated = false; + minstrel_calc_rate_stats(mrs); + cur_prob = mrs->prob_ewma; - if (!mr->cur_tp) + if (minstrel_ht_get_tp_avg(mi, group, i, cur_prob) == 0) continue; /* Find max throughput rate set */ @@ -614,7 +603,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) #endif /* Reset update timer */ - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; } static bool @@ -637,7 +626,7 @@ minstrel_ht_txstat_valid(struct minstrel_priv *mp, struct ieee80211_tx_rate *rat } static void -minstrel_next_sample_idx(struct minstrel_ht_sta *mi) +minstrel_set_next_sample_idx(struct minstrel_ht_sta *mi) { struct minstrel_mcs_group_data *mg; @@ -778,7 +767,8 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, update = true; } - if (time_after(jiffies, mi->stats_update + (mp->update_interval / 2 * HZ) / 1000)) { + if (time_after(jiffies, mi->last_stats_update + + (mp->update_interval / 2 * HZ) / 1000)) { update = true; minstrel_ht_update_stats(mp, mi); } @@ -791,7 +781,7 @@ static void minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, int index) { - struct minstrel_rate_stats *mr; + struct minstrel_rate_stats *mrs; const struct mcs_group *group; unsigned int tx_time, tx_time_rtscts, tx_time_data; unsigned int cw = mp->cw_min; @@ -800,16 +790,16 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, unsigned int ampdu_len = MINSTREL_TRUNC(mi->avg_ampdu_len); unsigned int overhead = 0, overhead_rtscts = 0; - mr = minstrel_get_ratestats(mi, index); - if (mr->probability < MINSTREL_FRAC(1, 10)) { - mr->retry_count = 1; - mr->retry_count_rtscts = 1; + mrs = minstrel_get_ratestats(mi, index); + if (mrs->prob_ewma < MINSTREL_FRAC(1, 10)) { + mrs->retry_count = 1; + mrs->retry_count_rtscts = 1; return; } - mr->retry_count = 2; - mr->retry_count_rtscts = 2; - mr->retry_updated = true; + mrs->retry_count = 2; + mrs->retry_count_rtscts = 2; + mrs->retry_updated = true; group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; tx_time_data = group->duration[index % MCS_GROUP_RATES] * ampdu_len / 1000; @@ -840,9 +830,9 @@ minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, tx_time_rtscts += ctime + overhead_rtscts + tx_time_data; if (tx_time_rtscts < mp->segment_size) - mr->retry_count_rtscts++; + mrs->retry_count_rtscts++; } while ((tx_time < mp->segment_size) && - (++mr->retry_count < mp->max_retry)); + (++mrs->retry_count < mp->max_retry)); } @@ -851,22 +841,22 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, struct ieee80211_sta_rates *ratetbl, int offset, int index) { const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; - struct minstrel_rate_stats *mr; + struct minstrel_rate_stats *mrs; u8 idx; u16 flags = group->flags; - mr = minstrel_get_ratestats(mi, index); - if (!mr->retry_updated) + mrs = minstrel_get_ratestats(mi, index); + if (!mrs->retry_updated) minstrel_calc_retransmit(mp, mi, index); - if (mr->probability < MINSTREL_FRAC(20, 100) || !mr->retry_count) { + if (mrs->prob_ewma < MINSTREL_FRAC(20, 100) || !mrs->retry_count) { ratetbl->rate[offset].count = 2; ratetbl->rate[offset].count_rts = 2; ratetbl->rate[offset].count_cts = 2; } else { - ratetbl->rate[offset].count = mr->retry_count; - ratetbl->rate[offset].count_cts = mr->retry_count; - ratetbl->rate[offset].count_rts = mr->retry_count_rtscts; + ratetbl->rate[offset].count = mrs->retry_count; + ratetbl->rate[offset].count_cts = mrs->retry_count; + ratetbl->rate[offset].count_rts = mrs->retry_count_rtscts; } if (index / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) @@ -924,7 +914,7 @@ minstrel_get_duration(int index) static int minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { - struct minstrel_rate_stats *mr; + struct minstrel_rate_stats *mrs; struct minstrel_mcs_group_data *mg; unsigned int sample_dur, sample_group, cur_max_tp_streams; int sample_idx = 0; @@ -940,12 +930,12 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) sample_group = mi->sample_group; mg = &mi->groups[sample_group]; sample_idx = sample_table[mg->column][mg->index]; - minstrel_next_sample_idx(mi); + minstrel_set_next_sample_idx(mi); if (!(mg->supported & BIT(sample_idx))) return -1; - mr = &mg->rates[sample_idx]; + mrs = &mg->rates[sample_idx]; sample_idx += sample_group * MCS_GROUP_RATES; /* @@ -962,7 +952,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) * Do not sample if the probability is already higher than 95% * to avoid wasting airtime. */ - if (mr->probability > MINSTREL_FRAC(95, 100)) + if (mrs->prob_ewma > MINSTREL_FRAC(95, 100)) return -1; /* @@ -977,7 +967,7 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) (cur_max_tp_streams - 1 < minstrel_mcs_groups[sample_group].streams || sample_dur >= minstrel_get_duration(mi->max_prob_rate))) { - if (mr->sample_skipped < 20) + if (mrs->sample_skipped < 20) return -1; if (mi->sample_slow++ > 2) @@ -1131,7 +1121,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, memset(mi, 0, sizeof(*mi)); mi->sta = sta; - mi->stats_update = jiffies; + mi->last_stats_update = jiffies; ack_dur = ieee80211_frame_duration(sband->band, 10, 60, 1, 1, 0); mi->overhead = ieee80211_frame_duration(sband->band, 0, 60, 1, 1, 0); @@ -1328,16 +1318,19 @@ static u32 minstrel_ht_get_expected_throughput(void *priv_sta) { struct minstrel_ht_sta_priv *msp = priv_sta; struct minstrel_ht_sta *mi = &msp->ht; - int i, j; + int i, j, prob, tp_avg; if (!msp->is_ht) return mac80211_minstrel.get_expected_throughput(priv_sta); i = mi->max_tp_rate[0] / MCS_GROUP_RATES; j = mi->max_tp_rate[0] % MCS_GROUP_RATES; + prob = mi->groups[i].rates[j].prob_ewma; + + /* convert tp_avg from pkt per second in kbps */ + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, prob) * AVG_PKT_SIZE * 8 / 1024; - /* convert cur_tp from pkt per second in kbps */ - return mi->groups[i].rates[j].cur_tp * AVG_PKT_SIZE * 8 / 1024; + return tp_avg; } static const struct rate_control_ops mac80211_minstrel_ht = { diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index f2217d6aa0c2..e8b52a94d24b 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -78,7 +78,7 @@ struct minstrel_ht_sta { u16 max_prob_rate; /* time of last status update */ - unsigned long stats_update; + unsigned long last_stats_update; /* overhead time in usec for each frame */ unsigned int overhead; @@ -112,6 +112,7 @@ struct minstrel_ht_sta_priv { }; #ifdef CONFIG_MAC80211_DEBUGFS struct dentry *dbg_stats; + struct dentry *dbg_stats_csv; #endif void *ratelist; void *sample_table; @@ -120,5 +121,7 @@ struct minstrel_ht_sta_priv { void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); void minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta); +int minstrel_ht_get_tp_avg(struct minstrel_ht_sta *mi, int group, int rate, + int prob_ewma); #endif diff --git a/net/mac80211/rc80211_minstrel_ht_debugfs.c b/net/mac80211/rc80211_minstrel_ht_debugfs.c index 20c676b8e5b6..6822ce0f95e5 100644 --- a/net/mac80211/rc80211_minstrel_ht_debugfs.c +++ b/net/mac80211/rc80211_minstrel_ht_debugfs.c @@ -19,7 +19,7 @@ static char * minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) { const struct mcs_group *mg; - unsigned int j, tp, prob, eprob; + unsigned int j, tp_max, tp_avg, prob, eprob, tx_time; char htmode = '2'; char gimode = 'L'; u32 gflags; @@ -38,19 +38,26 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) gimode = 'S'; for (j = 0; j < MCS_GROUP_RATES; j++) { - struct minstrel_rate_stats *mr = &mi->groups[i].rates[j]; + struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j]; static const int bitrates[4] = { 10, 20, 55, 110 }; int idx = i * MCS_GROUP_RATES + j; if (!(mi->groups[i].supported & BIT(j))) continue; - if (gflags & IEEE80211_TX_RC_MCS) - p += sprintf(p, " HT%c0/%cGI ", htmode, gimode); - else if (gflags & IEEE80211_TX_RC_VHT_MCS) - p += sprintf(p, "VHT%c0/%cGI ", htmode, gimode); - else - p += sprintf(p, " CCK/%cP ", j < 4 ? 'L' : 'S'); + if (gflags & IEEE80211_TX_RC_MCS) { + p += sprintf(p, "HT%c0 ", htmode); + p += sprintf(p, "%cGI ", gimode); + p += sprintf(p, "%d ", mg->streams); + } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { + p += sprintf(p, "VHT%c0 ", htmode); + p += sprintf(p, "%cGI ", gimode); + p += sprintf(p, "%d ", mg->streams); + } else { + p += sprintf(p, "CCK "); + p += sprintf(p, "%cP ", j < 4 ? 'L' : 'S'); + p += sprintf(p, "1 "); + } *(p++) = (idx == mi->max_tp_rate[0]) ? 'A' : ' '; *(p++) = (idx == mi->max_tp_rate[1]) ? 'B' : ' '; @@ -59,29 +66,39 @@ minstrel_ht_stats_dump(struct minstrel_ht_sta *mi, int i, char *p) *(p++) = (idx == mi->max_prob_rate) ? 'P' : ' '; if (gflags & IEEE80211_TX_RC_MCS) { - p += sprintf(p, " MCS%-2u ", (mg->streams - 1) * 8 + j); + p += sprintf(p, " MCS%-2u", (mg->streams - 1) * 8 + j); } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { - p += sprintf(p, " MCS%-1u/%1u", j, mg->streams); + p += sprintf(p, " MCS%-1u/%1u", j, mg->streams); } else { int r = bitrates[j % 4]; - p += sprintf(p, " %2u.%1uM ", r / 10, r % 10); + p += sprintf(p, " %2u.%1uM", r / 10, r % 10); } - tp = mr->cur_tp / 10; - prob = MINSTREL_TRUNC(mr->cur_prob * 1000); - eprob = MINSTREL_TRUNC(mr->probability * 1000); + p += sprintf(p, " %3u ", idx); - p += sprintf(p, " %4u.%1u %3u.%1u %3u.%1u " - "%3u %4u(%4u) %9llu(%9llu)\n", - tp / 10, tp % 10, + /* tx_time[rate(i)] in usec */ + tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000); + p += sprintf(p, "%6u ", tx_time); + + tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma); + prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + + p += sprintf(p, "%4u.%1u %4u.%1u %3u.%1u %3u.%1u" + " %3u.%1u %3u %3u %-3u " + "%9llu %-9llu\n", + tp_max / 10, tp_max % 10, + tp_avg / 10, tp_avg % 10, eprob / 10, eprob % 10, + mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10, prob / 10, prob % 10, - mr->retry_count, - mr->last_success, - mr->last_attempts, - (unsigned long long)mr->succ_hist, - (unsigned long long)mr->att_hist); + mrs->retry_count, + mrs->last_success, + mrs->last_attempts, + (unsigned long long)mrs->succ_hist, + (unsigned long long)mrs->att_hist); } return p; @@ -94,8 +111,8 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) struct minstrel_ht_sta *mi = &msp->ht; struct minstrel_debugfs_info *ms; unsigned int i; - char *p; int ret; + char *p; if (!msp->is_ht) { inode->i_private = &msp->legacy; @@ -110,8 +127,14 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) file->private_data = ms; p = ms->buf; - p += sprintf(p, " type rate tpt eprob *prob " - "ret *ok(*cum) ok( cum)\n"); + + p += sprintf(p, "\n"); + p += sprintf(p, " best ____________rate__________ " + "______statistics______ ________last_______ " + "______sum-of________\n"); + p += sprintf(p, "mode guard # rate [name idx airtime max_tp] " + "[ ø(tp) ø(prob) sd(prob)] [prob.|retry|suc|att] [#success | " + "#attempts]\n"); p = minstrel_ht_stats_dump(mi, MINSTREL_CCK_GROUP, p); for (i = 0; i < MINSTREL_CCK_GROUP; i++) @@ -123,11 +146,10 @@ minstrel_ht_stats_open(struct inode *inode, struct file *file) "lookaround %d\n", max(0, (int) mi->total_packets - (int) mi->sample_packets), mi->sample_packets); - p += sprintf(p, "Average A-MPDU length: %d.%d\n", + p += sprintf(p, "Average # of aggregated frames per A-MPDU: %d.%d\n", MINSTREL_TRUNC(mi->avg_ampdu_len), MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); ms->len = p - ms->buf; - WARN_ON(ms->len + sizeof(*ms) > 32768); return nonseekable_open(inode, file); @@ -141,6 +163,143 @@ static const struct file_operations minstrel_ht_stat_fops = { .llseek = no_llseek, }; +static char * +minstrel_ht_stats_csv_dump(struct minstrel_ht_sta *mi, int i, char *p) +{ + const struct mcs_group *mg; + unsigned int j, tp_max, tp_avg, prob, eprob, tx_time; + char htmode = '2'; + char gimode = 'L'; + u32 gflags; + + if (!mi->groups[i].supported) + return p; + + mg = &minstrel_mcs_groups[i]; + gflags = mg->flags; + + if (gflags & IEEE80211_TX_RC_40_MHZ_WIDTH) + htmode = '4'; + else if (gflags & IEEE80211_TX_RC_80_MHZ_WIDTH) + htmode = '8'; + if (gflags & IEEE80211_TX_RC_SHORT_GI) + gimode = 'S'; + + for (j = 0; j < MCS_GROUP_RATES; j++) { + struct minstrel_rate_stats *mrs = &mi->groups[i].rates[j]; + static const int bitrates[4] = { 10, 20, 55, 110 }; + int idx = i * MCS_GROUP_RATES + j; + + if (!(mi->groups[i].supported & BIT(j))) + continue; + + if (gflags & IEEE80211_TX_RC_MCS) { + p += sprintf(p, "HT%c0,", htmode); + p += sprintf(p, "%cGI,", gimode); + p += sprintf(p, "%d,", mg->streams); + } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { + p += sprintf(p, "VHT%c0,", htmode); + p += sprintf(p, "%cGI,", gimode); + p += sprintf(p, "%d,", mg->streams); + } else { + p += sprintf(p, "CCK,"); + p += sprintf(p, "%cP,", j < 4 ? 'L' : 'S'); + p += sprintf(p, "1,"); + } + + p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[0]) ? "A" : "")); + p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[1]) ? "B" : "")); + p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[2]) ? "C" : "")); + p += sprintf(p, "%s" ,((idx == mi->max_tp_rate[3]) ? "D" : "")); + p += sprintf(p, "%s" ,((idx == mi->max_prob_rate) ? "P" : "")); + + if (gflags & IEEE80211_TX_RC_MCS) { + p += sprintf(p, ",MCS%-2u,", (mg->streams - 1) * 8 + j); + } else if (gflags & IEEE80211_TX_RC_VHT_MCS) { + p += sprintf(p, ",MCS%-1u/%1u,", j, mg->streams); + } else { + int r = bitrates[j % 4]; + p += sprintf(p, ",%2u.%1uM,", r / 10, r % 10); + } + + p += sprintf(p, "%u,", idx); + tx_time = DIV_ROUND_CLOSEST(mg->duration[j], 1000); + p += sprintf(p, "%u,", tx_time); + + tp_max = minstrel_ht_get_tp_avg(mi, i, j, MINSTREL_FRAC(100, 100)); + tp_avg = minstrel_ht_get_tp_avg(mi, i, j, mrs->prob_ewma); + prob = MINSTREL_TRUNC(mrs->cur_prob * 1000); + eprob = MINSTREL_TRUNC(mrs->prob_ewma * 1000); + + p += sprintf(p, "%u.%u,%u.%u,%u.%u,%u.%u,%u.%u,%u,%u," + "%u,%llu,%llu,", + tp_max / 10, tp_max % 10, + tp_avg / 10, tp_avg % 10, + eprob / 10, eprob % 10, + mrs->prob_ewmsd / 10, mrs->prob_ewmsd % 10, + prob / 10, prob % 10, + mrs->retry_count, + mrs->last_success, + mrs->last_attempts, + (unsigned long long)mrs->succ_hist, + (unsigned long long)mrs->att_hist); + p += sprintf(p, "%d,%d,%d.%d\n", + max(0, (int) mi->total_packets - + (int) mi->sample_packets), + mi->sample_packets, + MINSTREL_TRUNC(mi->avg_ampdu_len), + MINSTREL_TRUNC(mi->avg_ampdu_len * 10) % 10); + } + + return p; +} + +static int +minstrel_ht_stats_csv_open(struct inode *inode, struct file *file) +{ + struct minstrel_ht_sta_priv *msp = inode->i_private; + struct minstrel_ht_sta *mi = &msp->ht; + struct minstrel_debugfs_info *ms; + unsigned int i; + int ret; + char *p; + + if (!msp->is_ht) { + inode->i_private = &msp->legacy; + ret = minstrel_stats_csv_open(inode, file); + inode->i_private = msp; + return ret; + } + + ms = kmalloc(32768, GFP_KERNEL); + + if (!ms) + return -ENOMEM; + + file->private_data = ms; + + p = ms->buf; + + p = minstrel_ht_stats_csv_dump(mi, MINSTREL_CCK_GROUP, p); + for (i = 0; i < MINSTREL_CCK_GROUP; i++) + p = minstrel_ht_stats_csv_dump(mi, i, p); + for (i++; i < ARRAY_SIZE(mi->groups); i++) + p = minstrel_ht_stats_csv_dump(mi, i, p); + + ms->len = p - ms->buf; + WARN_ON(ms->len + sizeof(*ms) > 32768); + + return nonseekable_open(inode, file); +} + +static const struct file_operations minstrel_ht_stat_csv_fops = { + .owner = THIS_MODULE, + .open = minstrel_ht_stats_csv_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, + .llseek = no_llseek, +}; + void minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) { @@ -148,6 +307,8 @@ minstrel_ht_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) msp->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, msp, &minstrel_ht_stat_fops); + msp->dbg_stats_csv = debugfs_create_file("rc_stats_csv", S_IRUGO, + dir, msp, &minstrel_ht_stat_csv_fops); } void @@ -156,4 +317,5 @@ minstrel_ht_remove_sta_debugfs(void *priv, void *priv_sta) struct minstrel_ht_sta_priv *msp = priv_sta; debugfs_remove(msp->dbg_stats); + debugfs_remove(msp->dbg_stats_csv); } diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 2cd02278d4d4..260eed45b6d2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1185,6 +1185,7 @@ static void sta_ps_start(struct sta_info *sta) struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct ps_data *ps; + int tid; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) @@ -1198,6 +1199,18 @@ static void sta_ps_start(struct sta_info *sta) drv_sta_notify(local, sdata, STA_NOTIFY_SLEEP, &sta->sta); ps_dbg(sdata, "STA %pM aid %d enters power save mode\n", sta->sta.addr, sta->sta.aid); + + if (!sta->sta.txq[0]) + return; + + for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); + + if (!skb_queue_len(&txqi->queue)) + set_bit(tid, &sta->txq_buffered_tids); + else + clear_bit(tid, &sta->txq_buffered_tids); + } } static void sta_ps_end(struct sta_info *sta) @@ -3424,7 +3437,8 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, __le16 fc; struct ieee80211_rx_data rx; struct ieee80211_sub_if_data *prev; - struct sta_info *sta, *tmp, *prev_sta; + struct sta_info *sta, *prev_sta; + struct rhash_head *tmp; int err = 0; fc = ((struct ieee80211_hdr *)skb->data)->frame_control; @@ -3459,9 +3473,13 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, ieee80211_scan_rx(local, skb); if (ieee80211_is_data(fc)) { + const struct bucket_table *tbl; + prev_sta = NULL; - for_each_sta_info(local, hdr->addr2, sta, tmp) { + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); + + for_each_sta_info(local, tbl, hdr->addr2, sta, tmp) { if (!prev_sta) { prev_sta = sta; continue; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index aacaa1a85e63..12971b71d0fa 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -64,32 +64,20 @@ * freed before they are done using it. */ +static const struct rhashtable_params sta_rht_params = { + .nelem_hint = 3, /* start small */ + .head_offset = offsetof(struct sta_info, hash_node), + .key_offset = offsetof(struct sta_info, sta.addr), + .key_len = ETH_ALEN, + .hashfn = sta_addr_hash, +}; + /* Caller must hold local->sta_mtx */ static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { - struct sta_info *s; - - s = rcu_dereference_protected(local->sta_hash[STA_HASH(sta->sta.addr)], - lockdep_is_held(&local->sta_mtx)); - if (!s) - return -ENOENT; - if (s == sta) { - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], - s->hnext); - return 0; - } - - while (rcu_access_pointer(s->hnext) && - rcu_access_pointer(s->hnext) != sta) - s = rcu_dereference_protected(s->hnext, - lockdep_is_held(&local->sta_mtx)); - if (rcu_access_pointer(s->hnext)) { - rcu_assign_pointer(s->hnext, sta->hnext); - return 0; - } - - return -ENOENT; + return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void __cleanup_single_sta(struct sta_info *sta) @@ -118,6 +106,16 @@ static void __cleanup_single_sta(struct sta_info *sta) atomic_dec(&ps->num_sta_ps); } + if (sta->sta.txq[0]) { + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[i]); + int n = skb_queue_len(&txqi->queue); + + ieee80211_purge_tx_queue(&local->hw, &txqi->queue); + atomic_sub(n, &sdata->txqs_len[txqi->txq.ac]); + } + } + for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) { local->total_ps_buffered -= skb_queue_len(&sta->ps_tx_buf[ac]); ieee80211_purge_tx_queue(&local->hw, &sta->ps_tx_buf[ac]); @@ -159,18 +157,8 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; - struct sta_info *sta; - sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - lockdep_is_held(&local->sta_mtx)); - while (sta) { - if (sta->sdata == sdata && - ether_addr_equal(sta->sta.addr, addr)) - break; - sta = rcu_dereference_check(sta->hnext, - lockdep_is_held(&local->sta_mtx)); - } - return sta; + return rhashtable_lookup_fast(&local->sta_hash, addr, sta_rht_params); } /* @@ -182,18 +170,24 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; struct sta_info *sta; + struct rhash_head *tmp; + const struct bucket_table *tbl; - sta = rcu_dereference_check(local->sta_hash[STA_HASH(addr)], - lockdep_is_held(&local->sta_mtx)); - while (sta) { - if ((sta->sdata == sdata || - (sta->sdata->bss && sta->sdata->bss == sdata->bss)) && - ether_addr_equal(sta->sta.addr, addr)) - break; - sta = rcu_dereference_check(sta->hnext, - lockdep_is_held(&local->sta_mtx)); + rcu_read_lock(); + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); + + for_each_sta_info(local, tbl, addr, sta, tmp) { + if (sta->sdata == sdata || + (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { + rcu_read_unlock(); + /* this is safe as the caller must already hold + * another rcu read section or the mutex + */ + return sta; + } } - return sta; + rcu_read_unlock(); + return NULL; } struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata, @@ -234,6 +228,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) sta_dbg(sta->sdata, "Destroyed STA %pM\n", sta->sta.addr); + if (sta->sta.txq[0]) + kfree(to_txq_info(sta->sta.txq[0])); kfree(rcu_dereference_raw(sta->sta.rates)); kfree(sta); } @@ -242,9 +238,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - lockdep_assert_held(&local->sta_mtx); - sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); + rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -285,11 +280,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, const u8 *addr, gfp_t gfp) { struct ieee80211_local *local = sdata->local; + struct ieee80211_hw *hw = &local->hw; struct sta_info *sta; struct timespec uptime; int i; - sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); + sta = kzalloc(sizeof(*sta) + hw->sta_data_size, gfp); if (!sta) return NULL; @@ -321,11 +317,25 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < ARRAY_SIZE(sta->chain_signal_avg); i++) ewma_init(&sta->chain_signal_avg[i], 1024, 8); - if (sta_prepare_rate_control(local, sta, gfp)) { - kfree(sta); - return NULL; + if (local->ops->wake_tx_queue) { + void *txq_data; + int size = sizeof(struct txq_info) + + ALIGN(hw->txq_data_size, sizeof(void *)); + + txq_data = kcalloc(ARRAY_SIZE(sta->sta.txq), size, gfp); + if (!txq_data) + goto free; + + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txq = txq_data + i * size; + + ieee80211_init_tx_queue(sdata, sta, txq, i); + } } + if (sta_prepare_rate_control(local, sta, gfp)) + goto free_txq; + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { /* * timer_to_tid must be initialized with identity mapping @@ -346,7 +356,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { struct ieee80211_supported_band *sband = - local->hw.wiphy->bands[ieee80211_get_sdata_band(sdata)]; + hw->wiphy->bands[ieee80211_get_sdata_band(sdata)]; u8 smps = (sband->ht_cap.cap & IEEE80211_HT_CAP_SM_PS) >> IEEE80211_HT_CAP_SM_PS_SHIFT; /* @@ -371,6 +381,13 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta_dbg(sdata, "Allocated STA %pM\n", sta->sta.addr); return sta; + +free_txq: + if (sta->sta.txq[0]) + kfree(to_txq_info(sta->sta.txq[0])); +free: + kfree(sta); + return NULL; } static int sta_info_insert_check(struct sta_info *sta) @@ -640,6 +657,8 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) indicate_tim |= sta->driver_buffered_tids & tids; + indicate_tim |= + sta->txq_buffered_tids & tids; } done: @@ -948,19 +967,32 @@ static void sta_info_cleanup(unsigned long data) round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); } -void sta_info_init(struct ieee80211_local *local) +u32 sta_addr_hash(const void *key, u32 length, u32 seed) +{ + return jhash(key, ETH_ALEN, seed); +} + +int sta_info_init(struct ieee80211_local *local) { + int err; + + err = rhashtable_init(&local->sta_hash, &sta_rht_params); + if (err) + return err; + spin_lock_init(&local->tim_lock); mutex_init(&local->sta_mtx); INIT_LIST_HEAD(&local->sta_list); setup_timer(&local->sta_cleanup, sta_info_cleanup, (unsigned long)local); + return 0; } void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); + rhashtable_destroy(&local->sta_hash); } @@ -1024,16 +1056,21 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, } struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, - const u8 *addr, - const u8 *localaddr) + const u8 *addr, + const u8 *localaddr) { - struct sta_info *sta, *nxt; + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + struct rhash_head *tmp; + const struct bucket_table *tbl; + + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); /* * Just return a random station if localaddr is NULL * ... first in list. */ - for_each_sta_info(hw_to_local(hw), addr, sta, nxt) { + for_each_sta_info(local, tbl, addr, sta, tmp) { if (localaddr && !ether_addr_equal(sta->sdata->vif.addr, localaddr)) continue; @@ -1071,7 +1108,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct sk_buff_head pending; - int filtered = 0, buffered = 0, ac; + int filtered = 0, buffered = 0, ac, i; unsigned long flags; struct ps_data *ps; @@ -1090,10 +1127,22 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) BUILD_BUG_ON(BITS_TO_LONGS(IEEE80211_NUM_TIDS) > 1); sta->driver_buffered_tids = 0; + sta->txq_buffered_tids = 0; if (!(local->hw.flags & IEEE80211_HW_AP_LINK_PS)) drv_sta_notify(local, sdata, STA_NOTIFY_AWAKE, &sta->sta); + if (sta->sta.txq[0]) { + for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[i]); + + if (!skb_queue_len(&txqi->queue)) + continue; + + drv_wake_tx_queue(local, txqi); + } + } + skb_queue_head_init(&pending); /* sync with ieee80211_tx_h_unicast_ps_buf */ @@ -1275,8 +1324,10 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, /* if we already have frames from software, then we can't also * release from hardware queues */ - if (skb_queue_empty(&frames)) + if (skb_queue_empty(&frames)) { driver_release_tids |= sta->driver_buffered_tids & tids; + driver_release_tids |= sta->txq_buffered_tids & tids; + } if (driver_release_tids) { /* If the driver has data on more than one TID then @@ -1447,6 +1498,9 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, sta_info_recalc_tim(sta); } else { + unsigned long tids = sta->txq_buffered_tids & driver_release_tids; + int tid; + /* * We need to release a frame that is buffered somewhere in the * driver ... it'll have to handle that. @@ -1466,8 +1520,22 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, * that the TID(s) became empty before returning here from the * release function. * Either way, however, when the driver tells us that the TID(s) - * became empty we'll do the TIM recalculation. + * became empty or we find that a txq became empty, we'll do the + * TIM recalculation. */ + + if (!sta->sta.txq[0]) + return; + + for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { + struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); + + if (!(tids & BIT(tid)) || skb_queue_len(&txqi->queue)) + continue; + + sta_info_recalc_tim(sta); + break; + } } } diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 7e2fa4018d41..5c164fb3f6c5 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -16,6 +16,7 @@ #include <linux/workqueue.h> #include <linux/average.h> #include <linux/etherdevice.h> +#include <linux/rhashtable.h> #include "key.h" /** @@ -248,7 +249,7 @@ struct sta_ampdu_mlme { * * @list: global linked list entry * @free_list: list entry for keeping track of stations to free - * @hnext: hash table linked list pointer + * @hash_node: hash node for rhashtable * @local: pointer to the global information * @sdata: virtual interface this station belongs to * @ptk: peer keys negotiated with this station, if any @@ -276,6 +277,7 @@ struct sta_ampdu_mlme { * entered power saving state, these are also delivered to * the station when it leaves powersave or polls for frames * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on + * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on * @rx_packets: Number of MSDUs received from this STA * @rx_bytes: Number of bytes received from this STA * @last_rx: time (in jiffies) when last frame was received from this STA @@ -341,7 +343,7 @@ struct sta_info { /* General information, mostly static */ struct list_head list, free_list; struct rcu_head rcu_head; - struct sta_info __rcu *hnext; + struct rhash_head hash_node; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; struct ieee80211_key __rcu *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; @@ -370,6 +372,7 @@ struct sta_info { struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS]; struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS]; unsigned long driver_buffered_tids; + unsigned long txq_buffered_tids; /* Updated from RX path only, no locking requirements */ unsigned long rx_packets; @@ -537,10 +540,6 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) lockdep_is_held(&sta->ampdu_mlme.mtx)); } -#define STA_HASH_SIZE 256 -#define STA_HASH(sta) (sta[5]) - - /* Maximum number of frames to buffer per power saving station per AC */ #define STA_MAX_TX_BUFFER 64 @@ -561,26 +560,15 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -static inline -void for_each_sta_info_type_check(struct ieee80211_local *local, - const u8 *addr, - struct sta_info *sta, - struct sta_info *nxt) -{ -} +u32 sta_addr_hash(const void *key, u32 length, u32 seed); + +#define _sta_bucket_idx(_tbl, _a) \ + rht_bucket_index(_tbl, sta_addr_hash(_a, ETH_ALEN, (_tbl)->hash_rnd)) -#define for_each_sta_info(local, _addr, _sta, nxt) \ - for ( /* initialise loop */ \ - _sta = rcu_dereference(local->sta_hash[STA_HASH(_addr)]),\ - nxt = _sta ? rcu_dereference(_sta->hnext) : NULL; \ - /* typecheck */ \ - for_each_sta_info_type_check(local, (_addr), _sta, nxt),\ - /* continue condition */ \ - _sta; \ - /* advance loop */ \ - _sta = nxt, \ - nxt = _sta ? rcu_dereference(_sta->hnext) : NULL \ - ) \ +#define for_each_sta_info(local, tbl, _addr, _sta, _tmp) \ + rht_for_each_entry_rcu(_sta, _tmp, tbl, \ + _sta_bucket_idx(tbl, _addr), \ + hash_node) \ /* compare address and run code only if it matches */ \ if (ether_addr_equal(_sta->sta.addr, (_addr))) @@ -617,7 +605,7 @@ int sta_info_destroy_addr_bss(struct ieee80211_sub_if_data *sdata, void sta_info_recalc_tim(struct sta_info *sta); -void sta_info_init(struct ieee80211_local *local); +int sta_info_init(struct ieee80211_local *local); void sta_info_stop(struct ieee80211_local *local); /** diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 2c51742428d5..005fdbe39a8b 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -654,7 +654,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_supported_band *sband; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; - struct sta_info *sta, *tmp; + struct sta_info *sta; + struct rhash_head *tmp; int retry_count; int rates_idx; bool send_to_cooked; @@ -663,6 +664,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) int rtap_len; int shift = 0; int tid = IEEE80211_NUM_TIDS; + const struct bucket_table *tbl; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); @@ -671,7 +673,9 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sband = local->hw.wiphy->bands[info->band]; fc = hdr->frame_control; - for_each_sta_info(local, hdr->addr1, sta, tmp) { + tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); + + for_each_sta_info(local, tbl, hdr->addr1, sta, tmp) { /* skip wrong virtual interface */ if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) continue; diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c index 386e45d8a958..edfe0c170a1c 100644 --- a/net/mac80211/trace.c +++ b/net/mac80211/trace.c @@ -8,6 +8,7 @@ #include "debug.h" #define CREATE_TRACE_POINTS #include "trace.h" +#include "trace_msg.h" #ifdef CONFIG_MAC80211_MESSAGE_TRACING void __sdata_info(const char *fmt, ...) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e9e462b349e5..4c2e7690226a 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2312,43 +2312,36 @@ TRACE_EVENT(drv_tdls_recv_channel_switch, ) ); -#ifdef CONFIG_MAC80211_MESSAGE_TRACING -#undef TRACE_SYSTEM -#define TRACE_SYSTEM mac80211_msg - -#define MAX_MSG_LEN 100 - -DECLARE_EVENT_CLASS(mac80211_msg_event, - TP_PROTO(struct va_format *vaf), +TRACE_EVENT(drv_wake_tx_queue, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct txq_info *txq), - TP_ARGS(vaf), + TP_ARGS(local, sdata, txq), TP_STRUCT__entry( - __dynamic_array(char, msg, MAX_MSG_LEN) + LOCAL_ENTRY + VIF_ENTRY + STA_ENTRY + __field(u8, ac) + __field(u8, tid) ), TP_fast_assign( - WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), - MAX_MSG_LEN, vaf->fmt, - *vaf->va) >= MAX_MSG_LEN); - ), + struct ieee80211_sta *sta = txq->txq.sta; - TP_printk("%s", __get_str(msg)) -); + LOCAL_ASSIGN; + VIF_ASSIGN; + STA_ASSIGN; + __entry->ac = txq->txq.ac; + __entry->tid = txq->txq.tid; + ), -DEFINE_EVENT(mac80211_msg_event, mac80211_info, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); -DEFINE_EVENT(mac80211_msg_event, mac80211_dbg, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) -); -DEFINE_EVENT(mac80211_msg_event, mac80211_err, - TP_PROTO(struct va_format *vaf), - TP_ARGS(vaf) + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " ac:%d tid:%d", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, __entry->ac, __entry->tid + ) ); -#endif #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h new file mode 100644 index 000000000000..768f7c22a190 --- /dev/null +++ b/net/mac80211/trace_msg.h @@ -0,0 +1,53 @@ +#ifdef CONFIG_MAC80211_MESSAGE_TRACING + +#if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) +#define __MAC80211_MSG_DRIVER_TRACE + +#include <linux/tracepoint.h> +#include <net/mac80211.h> +#include "ieee80211_i.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mac80211_msg + +#define MAX_MSG_LEN 100 + +DECLARE_EVENT_CLASS(mac80211_msg_event, + TP_PROTO(struct va_format *vaf), + + TP_ARGS(vaf), + + TP_STRUCT__entry( + __dynamic_array(char, msg, MAX_MSG_LEN) + ), + + TP_fast_assign( + WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg), + MAX_MSG_LEN, vaf->fmt, + *vaf->va) >= MAX_MSG_LEN); + ), + + TP_printk("%s", __get_str(msg)) +); + +DEFINE_EVENT(mac80211_msg_event, mac80211_info, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); +DEFINE_EVENT(mac80211_msg_event, mac80211_dbg, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); +DEFINE_EVENT(mac80211_msg_event, mac80211_err, + TP_PROTO(struct va_format *vaf), + TP_ARGS(vaf) +); +#endif /* !__MAC80211_MSG_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace_msg +#include <trace/define_trace.h> + +#endif diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9f7fb4eec37b..667111ee6a20 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -767,12 +767,22 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) return TX_CONTINUE; } +static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid) +{ + u16 *seq = &sta->tid_seq[tid]; + __le16 ret = cpu_to_le16(*seq); + + /* Increase the sequence number. */ + *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ; + + return ret; +} + static ieee80211_tx_result debug_noinline ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; - u16 *seq; u8 *qc; int tid; @@ -823,13 +833,10 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) qc = ieee80211_get_qos_ctl(hdr); tid = *qc & IEEE80211_QOS_CTL_TID_MASK; - seq = &tx->sta->tid_seq[tid]; tx->sta->tx_msdu[tid]++; - hdr->seq_ctrl = cpu_to_le16(*seq); - - /* Increase the sequence number. */ - *seq = (*seq + 0x10) & IEEE80211_SCTL_SEQ; + if (!tx->sta->sta.txq[0]) + hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; } @@ -1070,7 +1077,7 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx, * nothing -- this aggregation session is being started * but that might still fail with the driver */ - } else { + } else if (!tx->sta->sta.txq[tid]) { spin_lock(&tx->sta->lock); /* * Need to re-check now, because we may get here @@ -1211,13 +1218,102 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } +static void ieee80211_drv_tx(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *pubsta, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_control control = { + .sta = pubsta, + }; + struct ieee80211_txq *txq = NULL; + struct txq_info *txqi; + u8 ac; + + if (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE) + goto tx_normal; + + if (!ieee80211_is_data(hdr->frame_control)) + goto tx_normal; + + if (pubsta) { + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + + txq = pubsta->txq[tid]; + } else if (vif) { + txq = vif->txq; + } + + if (!txq) + goto tx_normal; + + ac = txq->ac; + txqi = to_txq_info(txq); + atomic_inc(&sdata->txqs_len[ac]); + if (atomic_read(&sdata->txqs_len[ac]) >= local->hw.txq_ac_max_pending) + netif_stop_subqueue(sdata->dev, ac); + + skb_queue_tail(&txqi->queue, skb); + drv_wake_tx_queue(local, txqi); + + return; + +tx_normal: + drv_tx(local, &control, skb); +} + +struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata = vif_to_sdata(txq->vif); + struct txq_info *txqi = container_of(txq, struct txq_info, txq); + struct ieee80211_hdr *hdr; + struct sk_buff *skb = NULL; + u8 ac = txq->ac; + + spin_lock_bh(&txqi->queue.lock); + + if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) + goto out; + + skb = __skb_dequeue(&txqi->queue); + if (!skb) + goto out; + + atomic_dec(&sdata->txqs_len[ac]); + if (__netif_subqueue_stopped(sdata->dev, ac)) + ieee80211_propagate_queue_wake(local, sdata->vif.hw_queue[ac]); + + hdr = (struct ieee80211_hdr *)skb->data; + if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) { + struct sta_info *sta = container_of(txq->sta, struct sta_info, + sta); + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid); + if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) + info->flags |= IEEE80211_TX_CTL_AMPDU; + else + info->flags &= ~IEEE80211_TX_CTL_AMPDU; + } + +out: + spin_unlock_bh(&txqi->queue.lock); + + return skb; +} +EXPORT_SYMBOL(ieee80211_tx_dequeue); + static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct sk_buff_head *skbs, bool txpending) { - struct ieee80211_tx_control control; struct sk_buff *skb, *tmp; unsigned long flags; @@ -1275,10 +1371,9 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); info->control.vif = vif; - control.sta = sta; __skb_unlink(skb, skbs); - drv_tx(local, &control, skb); + ieee80211_drv_tx(local, vif, sta, skb); } return true; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index d1742a7d9ea4..79412f16b61d 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -308,6 +308,11 @@ void ieee80211_propagate_queue_wake(struct ieee80211_local *local, int queue) for (ac = 0; ac < n_acs; ac++) { int ac_queue = sdata->vif.hw_queue[ac]; + if (local->ops->wake_tx_queue && + (atomic_read(&sdata->txqs_len[ac]) > + local->hw.txq_ac_max_pending)) + continue; + if (ac_queue == queue || (sdata->vif.cab_queue == queue && local->queue_stop_reasons[ac_queue] == 0 && @@ -2189,46 +2194,6 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->chanctx_mtx); } -static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) -{ - int i; - - for (i = 0; i < n_ids; i++) - if (ids[i] == id) - return true; - return false; -} - -size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, - const u8 *after_ric, int n_after_ric, - size_t offset) -{ - size_t pos = offset; - - while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { - if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { - pos += 2 + ies[pos + 1]; - - while (pos < ielen && - !ieee80211_id_in_list(after_ric, n_after_ric, - ies[pos])) - pos += 2 + ies[pos + 1]; - } else { - pos += 2 + ies[pos + 1]; - } - } - - return pos; -} - -size_t ieee80211_ie_split(const u8 *ies, size_t ielen, - const u8 *ids, int n_ids, size_t offset) -{ - return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); -} -EXPORT_SYMBOL(ieee80211_ie_split); - size_t ieee80211_ie_split_vendor(const u8 *ies, size_t ielen, size_t offset) { size_t pos = offset; @@ -3352,3 +3317,20 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo) return buf; } + +void ieee80211_init_tx_queue(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct txq_info *txqi, int tid) +{ + skb_queue_head_init(&txqi->queue); + txqi->txq.vif = &sdata->vif; + + if (sta) { + txqi->txq.sta = &sta->sta; + sta->sta.txq[tid] = &txqi->txq; + txqi->txq.ac = ieee802_1d_to_ac[tid & 7]; + } else { + sdata->vif.txq = &txqi->txq; + txqi->txq.ac = IEEE80211_AC_BE; + } +} diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 89f73a9e9874..a87d8b8ec730 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o obj-$(CONFIG_NF_TABLES) += nf_tables.o diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 758b002130d9..380ef5148ea1 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -19,6 +19,7 @@ #include <net/netlink.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> @@ -211,6 +212,22 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next, #define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed) #include "ip_set_hash_gen.h" +#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) +static const char *get_physindev_name(const struct sk_buff *skb) +{ + struct net_device *dev = nf_bridge_get_physindev(skb); + + return dev ? dev->name : NULL; +} + +static const char *get_phyoutdev_name(const struct sk_buff *skb) +{ + struct net_device *dev = nf_bridge_get_physoutdev(skb); + + return dev ? dev->name : NULL; +} +#endif + static int hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, @@ -234,16 +251,15 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb, e.ip &= ip_set_netmask(e.cidr); #define IFACE(dir) (par->dir ? par->dir->name : NULL) -#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL) #define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC) if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - const struct nf_bridge_info *nf_bridge = skb->nf_bridge; + e.iface = SRCDIR ? get_physindev_name(skb) : + get_phyoutdev_name(skb); - if (!nf_bridge) + if (!e.iface) return -EINVAL; - e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); e.physdev = 1; #else e.iface = NULL; @@ -476,11 +492,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb, if (opt->cmdflags & IPSET_FLAG_PHYSDEV) { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) - const struct nf_bridge_info *nf_bridge = skb->nf_bridge; - - if (!nf_bridge) + e.iface = SRCDIR ? get_physindev_name(skb) : + get_phyoutdev_name(skb); + if (!e.iface) return -EINVAL; - e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev); + e.physdev = 1; #else e.iface = NULL; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index bf02932b7188..19986ec5f21a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -536,8 +536,8 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb, ip_vs_update_conntrack(skb, cp, 1); if (!local) { skb_forward_csum(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, - dst_output); + NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; return ret; @@ -554,8 +554,8 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb, ip_vs_notrack(skb); if (!local) { skb_forward_csum(skb); - NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, - dst_output); + NF_HOOK(pf, NF_INET_LOCAL_OUT, NULL, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); } else ret = NF_ACCEPT; return ret; diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index 2631876ac55b..a5aa5967b8e1 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -17,6 +17,7 @@ #include <net/route.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> @@ -163,10 +164,10 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf, const struct net_device *physindev; const struct net_device *physoutdev; - physindev = skb->nf_bridge->physindev; + physindev = nf_bridge_get_physindev(skb); if (physindev && in != physindev) nf_log_buf_add(m, "PHYSIN=%s ", physindev->name); - physoutdev = skb->nf_bridge->physoutdev; + physoutdev = nf_bridge_get_physoutdev(skb); if (physoutdev && out != physoutdev) nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name); } diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index d3cd37edca18..2e88032cd5ad 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -10,6 +10,7 @@ #include <linux/proc_fs.h> #include <linux/skbuff.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> #include <net/protocol.h> @@ -54,14 +55,18 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry) dev_put(state->in); if (state->out) dev_put(state->out); + if (state->sk) + sock_put(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { - struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; + struct net_device *physdev; - if (nf_bridge->physindev) - dev_put(nf_bridge->physindev); - if (nf_bridge->physoutdev) - dev_put(nf_bridge->physoutdev); + physdev = nf_bridge_get_physindev(entry->skb); + if (physdev) + dev_put(physdev); + physdev = nf_bridge_get_physoutdev(entry->skb); + if (physdev) + dev_put(physdev); } #endif /* Drop reference to owner of hook which queued us. */ @@ -81,15 +86,16 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry) dev_hold(state->in); if (state->out) dev_hold(state->out); + if (state->sk) + sock_hold(state->sk); #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { - struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge; struct net_device *physdev; - physdev = nf_bridge->physindev; + physdev = nf_bridge_get_physindev(entry->skb); if (physdev) dev_hold(physdev); - physdev = nf_bridge->physoutdev; + physdev = nf_bridge_get_physoutdev(entry->skb); if (physdev) dev_hold(physdev); } @@ -198,7 +204,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) case NF_ACCEPT: case NF_STOP: local_bh_disable(); - entry->state.okfn(skb); + entry->state.okfn(entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 5604c2df05d1..78af83bc9c8e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1545,6 +1545,23 @@ nla_put_failure: return -1; }; +int nft_expr_dump(struct sk_buff *skb, unsigned int attr, + const struct nft_expr *expr) +{ + struct nlattr *nest; + + nest = nla_nest_start(skb, attr); + if (!nest) + goto nla_put_failure; + if (nf_tables_fill_expr_info(skb, expr) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + return 0; + +nla_put_failure: + return -1; +} + struct nft_expr_info { const struct nft_expr_ops *ops; struct nlattr *tb[NFT_EXPR_MAXATTR + 1]; @@ -1622,6 +1639,39 @@ static void nf_tables_expr_destroy(const struct nft_ctx *ctx, module_put(expr->ops->type->owner); } +struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, + const struct nlattr *nla) +{ + struct nft_expr_info info; + struct nft_expr *expr; + int err; + + err = nf_tables_expr_parse(ctx, nla, &info); + if (err < 0) + goto err1; + + err = -ENOMEM; + expr = kzalloc(info.ops->size, GFP_KERNEL); + if (expr == NULL) + goto err2; + + err = nf_tables_newexpr(ctx, &info, expr); + if (err < 0) + goto err2; + + return expr; +err2: + module_put(info.ops->type->owner); +err1: + return ERR_PTR(err); +} + +void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) +{ + nf_tables_expr_destroy(ctx, expr); + kfree(expr); +} + /* * Rules */ @@ -1703,12 +1753,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net, if (list == NULL) goto nla_put_failure; nft_rule_for_each_expr(expr, next, rule) { - struct nlattr *elem = nla_nest_start(skb, NFTA_LIST_ELEM); - if (elem == NULL) - goto nla_put_failure; - if (nf_tables_fill_expr_info(skb, expr) < 0) + if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr) < 0) goto nla_put_failure; - nla_nest_end(skb, elem); } nla_nest_end(skb, list); @@ -2159,7 +2205,7 @@ nft_select_set_ops(const struct nlattr * const nla[], features = 0; if (nla[NFTA_SET_FLAGS] != NULL) { features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); - features &= NFT_SET_INTERVAL | NFT_SET_MAP; + features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT; } bops = NULL; @@ -2216,6 +2262,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { [NFTA_SET_POLICY] = { .type = NLA_U32 }, [NFTA_SET_DESC] = { .type = NLA_NESTED }, [NFTA_SET_ID] = { .type = NLA_U32 }, + [NFTA_SET_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 }, }; static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = { @@ -2366,6 +2414,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx, goto nla_put_failure; } + if (set->timeout && + nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout))) + goto nla_put_failure; + if (set->gc_int && + nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int))) + goto nla_put_failure; + if (set->policy != NFT_SET_POL_PERFORMANCE) { if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy))) goto nla_put_failure; @@ -2578,7 +2633,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, char name[IFNAMSIZ]; unsigned int size; bool create; - u32 ktype, dtype, flags, policy; + u64 timeout; + u32 ktype, dtype, flags, policy, gc_int; struct nft_set_desc desc; int err; @@ -2598,15 +2654,20 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, } desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); - if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data)) + if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN) return -EINVAL; flags = 0; if (nla[NFTA_SET_FLAGS] != NULL) { flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS])); if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT | - NFT_SET_INTERVAL | NFT_SET_MAP)) + NFT_SET_INTERVAL | NFT_SET_TIMEOUT | + NFT_SET_MAP | NFT_SET_EVAL)) return -EINVAL; + /* Only one of both operations is supported */ + if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) == + (NFT_SET_MAP | NFT_SET_EVAL)) + return -EOPNOTSUPP; } dtype = 0; @@ -2623,14 +2684,26 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, if (nla[NFTA_SET_DATA_LEN] == NULL) return -EINVAL; desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN])); - if (desc.dlen == 0 || - desc.dlen > FIELD_SIZEOF(struct nft_data, data)) + if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN) return -EINVAL; } else - desc.dlen = sizeof(struct nft_data); + desc.dlen = sizeof(struct nft_verdict); } else if (flags & NFT_SET_MAP) return -EINVAL; + timeout = 0; + if (nla[NFTA_SET_TIMEOUT] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT])); + } + gc_int = 0; + if (nla[NFTA_SET_GC_INTERVAL] != NULL) { + if (!(flags & NFT_SET_TIMEOUT)) + return -EINVAL; + gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL])); + } + policy = NFT_SET_POL_PERFORMANCE; if (nla[NFTA_SET_POLICY] != NULL) policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY])); @@ -2699,6 +2772,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb, set->flags = flags; set->size = desc.size; set->policy = policy; + set->timeout = timeout; + set->gc_int = gc_int; err = ops->init(set, &desc, nla); if (err < 0) @@ -2771,9 +2846,10 @@ static int nf_tables_bind_check_setelem(const struct nft_ctx *ctx, enum nft_registers dreg; dreg = nft_type_to_reg(set->dtype); - return nft_validate_data_load(ctx, dreg, nft_set_ext_data(ext), - set->dtype == NFT_DATA_VERDICT ? - NFT_DATA_VERDICT : NFT_DATA_VALUE); + return nft_validate_register_store(ctx, dreg, nft_set_ext_data(ext), + set->dtype == NFT_DATA_VERDICT ? + NFT_DATA_VERDICT : NFT_DATA_VALUE, + set->dlen); } int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, @@ -2785,12 +2861,13 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS) return -EBUSY; - if (set->flags & NFT_SET_MAP) { + if (binding->flags & NFT_SET_MAP) { /* If the set is already bound to the same chain all * jumps are already validated for that chain. */ list_for_each_entry(i, &set->bindings, list) { - if (i->chain == binding->chain) + if (binding->flags & NFT_SET_MAP && + i->chain == binding->chain) goto bind; } @@ -2826,17 +2903,30 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_KEY] = { - .len = sizeof(struct nft_data), - .align = __alignof__(struct nft_data), + .align = __alignof__(u32), }, [NFT_SET_EXT_DATA] = { - .len = sizeof(struct nft_data), - .align = __alignof__(struct nft_data), + .align = __alignof__(u32), + }, + [NFT_SET_EXT_EXPR] = { + .align = __alignof__(struct nft_expr), }, [NFT_SET_EXT_FLAGS] = { .len = sizeof(u8), .align = __alignof__(u8), }, + [NFT_SET_EXT_TIMEOUT] = { + .len = sizeof(u64), + .align = __alignof__(u64), + }, + [NFT_SET_EXT_EXPIRATION] = { + .len = sizeof(unsigned long), + .align = __alignof__(unsigned long), + }, + [NFT_SET_EXT_USERDATA] = { + .len = sizeof(struct nft_userdata), + .align = __alignof__(struct nft_userdata), + }, }; EXPORT_SYMBOL_GPL(nft_set_ext_types); @@ -2848,6 +2938,9 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, + .len = NFT_USERDATA_MAXLEN }, }; static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = { @@ -2904,11 +2997,43 @@ static int nf_tables_fill_setelem(struct sk_buff *skb, set->dlen) < 0) goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) && + nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0) + goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) && nla_put_be32(skb, NFTA_SET_ELEM_FLAGS, htonl(*nft_set_ext_flags(ext)))) goto nla_put_failure; + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) && + nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT, + cpu_to_be64(*nft_set_ext_timeout(ext)))) + goto nla_put_failure; + + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { + unsigned long expires, now = jiffies; + + expires = *nft_set_ext_expiration(ext); + if (time_before(now, expires)) + expires -= now; + else + expires = 0; + + if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION, + cpu_to_be64(jiffies_to_msecs(expires)))) + goto nla_put_failure; + } + + if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) { + struct nft_userdata *udata; + + udata = nft_set_ext_userdata(ext); + if (nla_put(skb, NFTA_SET_ELEM_USERDATA, + udata->len + 1, udata->data)) + goto nla_put_failure; + } + nla_nest_end(skb, nest); return 0; @@ -3128,11 +3253,10 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx, return trans; } -static void *nft_set_elem_init(const struct nft_set *set, - const struct nft_set_ext_tmpl *tmpl, - const struct nft_data *key, - const struct nft_data *data, - gfp_t gfp) +void *nft_set_elem_init(const struct nft_set *set, + const struct nft_set_ext_tmpl *tmpl, + const u32 *key, const u32 *data, + u64 timeout, gfp_t gfp) { struct nft_set_ext *ext; void *elem; @@ -3147,6 +3271,11 @@ static void *nft_set_elem_init(const struct nft_set *set, memcpy(nft_set_ext_key(ext), key, set->klen); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) memcpy(nft_set_ext_data(ext), data, set->dlen); + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) + *nft_set_ext_expiration(ext) = + jiffies + msecs_to_jiffies(timeout); + if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) + *nft_set_ext_timeout(ext) = timeout; return elem; } @@ -3158,6 +3287,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem) nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE); if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA)) nft_data_uninit(nft_set_ext_data(ext), set->dtype); + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext)); kfree(elem); } @@ -3172,15 +3303,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, struct nft_set_ext *ext; struct nft_set_elem elem; struct nft_set_binding *binding; + struct nft_userdata *udata; struct nft_data data; enum nft_registers dreg; struct nft_trans *trans; + u64 timeout; u32 flags; + u8 ulen; int err; - if (set->size && set->nelems == set->size) - return -ENFILE; - err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, nft_set_elem_policy); if (err < 0) @@ -3215,17 +3346,33 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, return -EINVAL; } - err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]); + timeout = 0; + if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) { + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT])); + } else if (set->flags & NFT_SET_TIMEOUT) { + timeout = set->timeout; + } + + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; err = -EINVAL; if (d1.type != NFT_DATA_VALUE || d1.len != set->klen) goto err2; - nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len); + if (timeout > 0) { + nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION); + if (timeout != set->timeout) + nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT); + } if (nla[NFTA_SET_ELEM_DATA] != NULL) { - err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]); + err = nft_data_init(ctx, &data, sizeof(data), &d2, + nla[NFTA_SET_ELEM_DATA]); if (err < 0) goto err2; @@ -3241,29 +3388,51 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, .chain = (struct nft_chain *)binding->chain, }; - err = nft_validate_data_load(&bind_ctx, dreg, - &data, d2.type); + if (!(binding->flags & NFT_SET_MAP)) + continue; + + err = nft_validate_register_store(&bind_ctx, dreg, + &data, + d2.type, d2.len); if (err < 0) goto err3; } - nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA); + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len); + } + + /* The full maximum length of userdata can exceed the maximum + * offset value (U8_MAX) for following extensions, therefor it + * must be the last extension added. + */ + ulen = 0; + if (nla[NFTA_SET_ELEM_USERDATA] != NULL) { + ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]); + if (ulen > 0) + nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA, + ulen); } err = -ENOMEM; - elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL); + elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data, + timeout, GFP_KERNEL); if (elem.priv == NULL) goto err3; ext = nft_set_elem_ext(set, elem.priv); if (flags) *nft_set_ext_flags(ext) = flags; + if (ulen > 0) { + udata = nft_set_ext_userdata(ext); + udata->len = ulen - 1; + nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen); + } trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set); if (trans == NULL) goto err4; - ext->genmask = nft_genmask_cur(ctx->net); + ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; err = set->ops->insert(set, &elem); if (err < 0) goto err5; @@ -3280,7 +3449,7 @@ err3: if (nla[NFTA_SET_ELEM_DATA] != NULL) nft_data_uninit(&data, d2.type); err2: - nft_data_uninit(&elem.key, d1.type); + nft_data_uninit(&elem.key.val, d1.type); err1: return err; } @@ -3316,11 +3485,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb, return -EBUSY; nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + if (set->size && + !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) + return -ENFILE; + err = nft_add_set_elem(&ctx, set, attr); - if (err < 0) + if (err < 0) { + atomic_dec(&set->nelems); break; - - set->nelems++; + } } return err; } @@ -3343,7 +3516,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, if (nla[NFTA_SET_ELEM_KEY] == NULL) goto err1; - err = nft_data_init(ctx, &elem.key, &desc, nla[NFTA_SET_ELEM_KEY]); + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, + nla[NFTA_SET_ELEM_KEY]); if (err < 0) goto err1; @@ -3370,7 +3544,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set, err3: kfree(trans); err2: - nft_data_uninit(&elem.key, desc.type); + nft_data_uninit(&elem.key.val, desc.type); err1: return err; } @@ -3402,11 +3576,36 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb, if (err < 0) break; - set->nelems--; + set->ndeact++; } return err; } +void nft_set_gc_batch_release(struct rcu_head *rcu) +{ + struct nft_set_gc_batch *gcb; + unsigned int i; + + gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu); + for (i = 0; i < gcb->head.cnt; i++) + nft_set_elem_destroy(gcb->head.set, gcb->elems[i]); + kfree(gcb); +} +EXPORT_SYMBOL_GPL(nft_set_gc_batch_release); + +struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set, + gfp_t gfp) +{ + struct nft_set_gc_batch *gcb; + + gcb = kzalloc(sizeof(*gcb), gfp); + if (gcb == NULL) + return gcb; + gcb->head.set = set; + return gcb; +} +EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc); + static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, u32 portid, u32 seq) { @@ -3710,6 +3909,8 @@ static int nf_tables_commit(struct sk_buff *skb) &te->elem, NFT_MSG_DELSETELEM, 0); te->set->ops->remove(te->set, &te->elem); + atomic_dec(&te->set->nelems); + te->set->ndeact--; break; } } @@ -3813,16 +4014,16 @@ static int nf_tables_abort(struct sk_buff *skb) nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: - nft_trans_elem_set(trans)->nelems--; te = (struct nft_trans_elem *)trans->data; te->set->ops->remove(te->set, &te->elem); + atomic_dec(&te->set->nelems); break; case NFT_MSG_DELSETELEM: te = (struct nft_trans_elem *)trans->data; - nft_trans_elem_set(trans)->nelems++; te->set->ops->activate(te->set, &te->elem); + te->set->ndeact--; nft_trans_destroy(trans); break; @@ -3906,10 +4107,10 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx, return 0; data = nft_set_ext_data(ext); - switch (data->verdict) { + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - return nf_tables_check_loops(ctx, data->chain); + return nf_tables_check_loops(ctx, data->verdict.chain); default: return 0; } @@ -3942,10 +4143,11 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, if (data == NULL) continue; - switch (data->verdict) { + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - err = nf_tables_check_loops(ctx, data->chain); + err = nf_tables_check_loops(ctx, + data->verdict.chain); if (err < 0) return err; default: @@ -3960,7 +4162,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, continue; list_for_each_entry(binding, &set->bindings, list) { - if (binding->chain != chain) + if (!(binding->flags & NFT_SET_MAP) || + binding->chain != chain) continue; iter.skip = 0; @@ -3978,85 +4181,129 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, } /** - * nft_validate_input_register - validate an expressions' input register + * nft_parse_register - parse a register value from a netlink attribute * - * @reg: the register number + * @attr: netlink attribute * - * Validate that the input register is one of the general purpose - * registers. + * Parse and translate a register value from a netlink attribute. + * Registers used to be 128 bit wide, these register numbers will be + * mapped to the corresponding 32 bit register numbers. */ -int nft_validate_input_register(enum nft_registers reg) +unsigned int nft_parse_register(const struct nlattr *attr) { - if (reg <= NFT_REG_VERDICT) - return -EINVAL; - if (reg > NFT_REG_MAX) - return -ERANGE; - return 0; + unsigned int reg; + + reg = ntohl(nla_get_be32(attr)); + switch (reg) { + case NFT_REG_VERDICT...NFT_REG_4: + return reg * NFT_REG_SIZE / NFT_REG32_SIZE; + default: + return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00; + } } -EXPORT_SYMBOL_GPL(nft_validate_input_register); +EXPORT_SYMBOL_GPL(nft_parse_register); /** - * nft_validate_output_register - validate an expressions' output register + * nft_dump_register - dump a register value to a netlink attribute + * + * @skb: socket buffer + * @attr: attribute number + * @reg: register number + * + * Construct a netlink attribute containing the register number. For + * compatibility reasons, register numbers being a multiple of 4 are + * translated to the corresponding 128 bit register numbers. + */ +int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg) +{ + if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0) + reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE); + else + reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00; + + return nla_put_be32(skb, attr, htonl(reg)); +} +EXPORT_SYMBOL_GPL(nft_dump_register); + +/** + * nft_validate_register_load - validate a load from a register * * @reg: the register number + * @len: the length of the data * - * Validate that the output register is one of the general purpose - * registers or the verdict register. + * Validate that the input register is one of the general purpose + * registers and that the length of the load is within the bounds. */ -int nft_validate_output_register(enum nft_registers reg) +int nft_validate_register_load(enum nft_registers reg, unsigned int len) { - if (reg < NFT_REG_VERDICT) + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) + return -EINVAL; + if (len == 0) return -EINVAL; - if (reg > NFT_REG_MAX) + if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data)) return -ERANGE; + return 0; } -EXPORT_SYMBOL_GPL(nft_validate_output_register); +EXPORT_SYMBOL_GPL(nft_validate_register_load); /** - * nft_validate_data_load - validate an expressions' data load + * nft_validate_register_store - validate an expressions' register store * * @ctx: context of the expression performing the load * @reg: the destination register number * @data: the data to load * @type: the data type + * @len: the length of the data * * Validate that a data load uses the appropriate data type for - * the destination register. A value of NULL for the data means - * that its runtime gathered data, which is always of type - * NFT_DATA_VALUE. + * the destination register and the length is within the bounds. + * A value of NULL for the data means that its runtime gathered + * data. */ -int nft_validate_data_load(const struct nft_ctx *ctx, enum nft_registers reg, - const struct nft_data *data, - enum nft_data_types type) +int nft_validate_register_store(const struct nft_ctx *ctx, + enum nft_registers reg, + const struct nft_data *data, + enum nft_data_types type, unsigned int len) { int err; switch (reg) { case NFT_REG_VERDICT: - if (data == NULL || type != NFT_DATA_VERDICT) + if (type != NFT_DATA_VERDICT) return -EINVAL; - if (data->verdict == NFT_GOTO || data->verdict == NFT_JUMP) { - err = nf_tables_check_loops(ctx, data->chain); + if (data != NULL && + (data->verdict.code == NFT_GOTO || + data->verdict.code == NFT_JUMP)) { + err = nf_tables_check_loops(ctx, data->verdict.chain); if (err < 0) return err; - if (ctx->chain->level + 1 > data->chain->level) { + if (ctx->chain->level + 1 > + data->verdict.chain->level) { if (ctx->chain->level + 1 == NFT_JUMP_STACK_SIZE) return -EMLINK; - data->chain->level = ctx->chain->level + 1; + data->verdict.chain->level = ctx->chain->level + 1; } } return 0; default: + if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE) + return -EINVAL; + if (len == 0) + return -EINVAL; + if (reg * NFT_REG32_SIZE + len > + FIELD_SIZEOF(struct nft_regs, data)) + return -ERANGE; + if (data != NULL && type != NFT_DATA_VALUE) return -EINVAL; return 0; } } -EXPORT_SYMBOL_GPL(nft_validate_data_load); +EXPORT_SYMBOL_GPL(nft_validate_register_store); static const struct nla_policy nft_verdict_policy[NFTA_VERDICT_MAX + 1] = { [NFTA_VERDICT_CODE] = { .type = NLA_U32 }, @@ -4077,11 +4324,11 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, if (!tb[NFTA_VERDICT_CODE]) return -EINVAL; - data->verdict = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); + data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE])); - switch (data->verdict) { + switch (data->verdict.code) { default: - switch (data->verdict & NF_VERDICT_MASK) { + switch (data->verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: @@ -4107,7 +4354,7 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, return -EOPNOTSUPP; chain->use++; - data->chain = chain; + data->verdict.chain = chain; desc->len = sizeof(data); break; } @@ -4118,10 +4365,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data, static void nft_verdict_uninit(const struct nft_data *data) { - switch (data->verdict) { + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - data->chain->use--; + data->verdict.chain->use--; break; } } @@ -4134,13 +4381,14 @@ static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data) if (!nest) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict))) + if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code))) goto nla_put_failure; - switch (data->verdict) { + switch (data->verdict.code) { case NFT_JUMP: case NFT_GOTO: - if (nla_put_string(skb, NFTA_VERDICT_CHAIN, data->chain->name)) + if (nla_put_string(skb, NFTA_VERDICT_CHAIN, + data->verdict.chain->name)) goto nla_put_failure; } nla_nest_end(skb, nest); @@ -4150,7 +4398,8 @@ nla_put_failure: return -1; } -static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, +static int nft_value_init(const struct nft_ctx *ctx, + struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla) { unsigned int len; @@ -4158,10 +4407,10 @@ static int nft_value_init(const struct nft_ctx *ctx, struct nft_data *data, len = nla_len(nla); if (len == 0) return -EINVAL; - if (len > sizeof(data->data)) + if (len > size) return -EOVERFLOW; - nla_memcpy(data->data, nla, sizeof(data->data)); + nla_memcpy(data->data, nla, len); desc->type = NFT_DATA_VALUE; desc->len = len; return 0; @@ -4174,8 +4423,7 @@ static int nft_value_dump(struct sk_buff *skb, const struct nft_data *data, } static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { - [NFTA_DATA_VALUE] = { .type = NLA_BINARY, - .len = FIELD_SIZEOF(struct nft_data, data) }, + [NFTA_DATA_VALUE] = { .type = NLA_BINARY }, [NFTA_DATA_VERDICT] = { .type = NLA_NESTED }, }; @@ -4184,6 +4432,7 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * * @ctx: context of the expression using the data * @data: destination struct nft_data + * @size: maximum data length * @desc: data description * @nla: netlink attribute containing data * @@ -4193,7 +4442,8 @@ static const struct nla_policy nft_data_policy[NFTA_DATA_MAX + 1] = { * The caller can indicate that it only wants to accept data of type * NFT_DATA_VALUE by passing NULL for the ctx argument. */ -int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, +int nft_data_init(const struct nft_ctx *ctx, + struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla) { struct nlattr *tb[NFTA_DATA_MAX + 1]; @@ -4204,7 +4454,8 @@ int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, return err; if (tb[NFTA_DATA_VALUE]) - return nft_value_init(ctx, data, desc, tb[NFTA_DATA_VALUE]); + return nft_value_init(ctx, data, size, desc, + tb[NFTA_DATA_VALUE]); if (tb[NFTA_DATA_VERDICT] && ctx != NULL) return nft_verdict_init(ctx, data, desc, tb[NFTA_DATA_VERDICT]); return -EINVAL; diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index ef4dfcbaf149..f153b07073af 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -65,23 +65,23 @@ static inline void nft_trace_packet(const struct nft_pktinfo *pkt, } static void nft_cmp_fast_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1]) + struct nft_regs *regs) { const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); u32 mask = nft_cmp_fast_mask(priv->len); - if ((data[priv->sreg].data[0] & mask) == priv->data) + if ((regs->data[priv->sreg] & mask) == priv->data) return; - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static bool nft_payload_fast_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; unsigned char *ptr; if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) @@ -94,12 +94,13 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) return false; + *dest = 0; if (priv->len == 2) - *(u16 *)dest->data = *(u16 *)ptr; + *(u16 *)dest = *(u16 *)ptr; else if (priv->len == 4) - *(u32 *)dest->data = *(u32 *)ptr; + *(u32 *)dest = *(u32 *)ptr; else - *(u8 *)dest->data = *(u8 *)ptr; + *(u8 *)dest = *(u8 *)ptr; return true; } @@ -116,7 +117,7 @@ nft_do_chain(struct nft_pktinfo *pkt, const struct nf_hook_ops *ops) const struct net *net = read_pnet(&nft_base_chain(basechain)->pnet); const struct nft_rule *rule; const struct nft_expr *expr, *last; - struct nft_data data[NFT_REG_MAX + 1]; + struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; struct nft_stats *stats; @@ -127,7 +128,7 @@ do_chain: rulenum = 0; rule = list_entry(&chain->rules, struct nft_rule, list); next_rule: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + regs.verdict.code = NFT_CONTINUE; list_for_each_entry_continue_rcu(rule, &chain->rules, list) { /* This rule is not active, skip. */ @@ -138,18 +139,18 @@ next_rule: nft_rule_for_each_expr(expr, last, rule) { if (expr->ops == &nft_cmp_fast_ops) - nft_cmp_fast_eval(expr, data); + nft_cmp_fast_eval(expr, ®s); else if (expr->ops != &nft_payload_fast_ops || - !nft_payload_fast_eval(expr, data, pkt)) - expr->ops->eval(expr, data, pkt); + !nft_payload_fast_eval(expr, ®s, pkt)) + expr->ops->eval(expr, ®s, pkt); - if (data[NFT_REG_VERDICT].verdict != NFT_CONTINUE) + if (regs.verdict.code != NFT_CONTINUE) break; } - switch (data[NFT_REG_VERDICT].verdict) { + switch (regs.verdict.code) { case NFT_BREAK: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + regs.verdict.code = NFT_CONTINUE; continue; case NFT_CONTINUE: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); @@ -158,15 +159,15 @@ next_rule: break; } - switch (data[NFT_REG_VERDICT].verdict & NF_VERDICT_MASK) { + switch (regs.verdict.code & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_DROP: case NF_QUEUE: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - return data[NFT_REG_VERDICT].verdict; + return regs.verdict.code; } - switch (data[NFT_REG_VERDICT].verdict) { + switch (regs.verdict.code) { case NFT_JUMP: BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE); jumpstack[stackptr].chain = chain; @@ -177,7 +178,7 @@ next_rule: case NFT_GOTO: nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE); - chain = data[NFT_REG_VERDICT].chain; + chain = regs.verdict.chain; goto do_chain; case NFT_CONTINUE: rulenum++; @@ -239,8 +240,14 @@ int __init nf_tables_core_module_init(void) if (err < 0) goto err6; + err = nft_dynset_module_init(); + if (err < 0) + goto err7; + return 0; +err7: + nft_payload_module_exit(); err6: nft_byteorder_module_exit(); err5: @@ -257,6 +264,7 @@ err1: void nf_tables_core_module_exit(void) { + nft_dynset_module_exit(); nft_payload_module_exit(); nft_byteorder_module_exit(); nft_bitwise_module_exit(); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 957b83a0223b..3ad91266c821 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -23,6 +23,7 @@ #include <linux/ipv6.h> #include <linux/netdevice.h> #include <linux/netfilter.h> +#include <linux/netfilter_bridge.h> #include <net/netlink.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_log.h> @@ -62,7 +63,7 @@ struct nfulnl_instance { struct timer_list timer; struct net *net; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ - int peer_portid; /* PORTID of the peer process */ + u32 peer_portid; /* PORTID of the peer process */ /* configurable parameters */ unsigned int flushtimeout; /* timeout until queue flush */ @@ -151,7 +152,7 @@ static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * instance_create(struct net *net, u_int16_t group_num, - int portid, struct user_namespace *user_ns) + u32 portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; struct nfnl_log_net *log = nfnl_log_pernet(net); @@ -448,14 +449,18 @@ __build_packet_message(struct nfnl_log_net *log, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; } else { + struct net_device *physindev; + /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; - if (skb->nf_bridge && skb->nf_bridge->physindev && + + physindev = nf_bridge_get_physindev(skb); + if (physindev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, - htonl(skb->nf_bridge->physindev->ifindex))) + htonl(physindev->ifindex))) goto nla_put_failure; } #endif @@ -479,14 +484,18 @@ __build_packet_message(struct nfnl_log_net *log, htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) goto nla_put_failure; } else { + struct net_device *physoutdev; + /* Case 2: indev is a bridge group, we need to look * for physical device (when called from ipv4) */ if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; - if (skb->nf_bridge && skb->nf_bridge->physoutdev && + + physoutdev = nf_bridge_get_physoutdev(skb); + if (physoutdev && nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, - htonl(skb->nf_bridge->physoutdev->ifindex))) + htonl(physoutdev->ifindex))) goto nla_put_failure; } #endif @@ -998,7 +1007,7 @@ static int seq_show(struct seq_file *s, void *v) { const struct nfulnl_instance *inst = v; - seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", + seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n", inst->group_num, inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index 6e74655a8d4f..0b98c7420239 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -25,6 +25,7 @@ #include <linux/proc_fs.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> +#include <linux/netfilter_bridge.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_queue.h> #include <linux/list.h> @@ -54,7 +55,7 @@ struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; - int peer_portid; + u32 peer_portid; unsigned int queue_maxlen; unsigned int copy_range; unsigned int queue_dropped; @@ -109,8 +110,7 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, - int portid) +instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid) { struct nfqnl_instance *inst; unsigned int h; @@ -396,14 +396,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; } else { + int physinif; + /* Case 2: indev is bridge group, we need to look for * physical device (when called from ipv4) */ if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex))) goto nla_put_failure; - if (entskb->nf_bridge && entskb->nf_bridge->physindev && + + physinif = nf_bridge_get_physinif(entskb); + if (physinif && nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV, - htonl(entskb->nf_bridge->physindev->ifindex))) + htonl(physinif))) goto nla_put_failure; } #endif @@ -426,14 +430,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) goto nla_put_failure; } else { + int physoutif; + /* Case 2: outdev is bridge group, we need to look for * physical output device (when called from ipv4) */ if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex))) goto nla_put_failure; - if (entskb->nf_bridge && entskb->nf_bridge->physoutdev && + + physoutif = nf_bridge_get_physoutif(entskb); + if (physoutif && nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV, - htonl(entskb->nf_bridge->physoutdev->ifindex))) + htonl(physoutif))) goto nla_put_failure; } #endif @@ -765,11 +773,12 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex) return 1; #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) if (entry->skb->nf_bridge) { - if (entry->skb->nf_bridge->physindev && - entry->skb->nf_bridge->physindev->ifindex == ifindex) - return 1; - if (entry->skb->nf_bridge->physoutdev && - entry->skb->nf_bridge->physoutdev->ifindex == ifindex) + int physinif, physoutif; + + physinif = nf_bridge_get_physinif(entry->skb); + physoutif = nf_bridge_get_physoutif(entry->skb); + + if (physinif == ifindex || physoutif == ifindex) return 1; } #endif @@ -860,7 +869,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { }; static struct nfqnl_instance * -verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid) +verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid) { struct nfqnl_instance *queue; @@ -1242,7 +1251,7 @@ static int seq_show(struct seq_file *s, void *v) { const struct nfqnl_instance *inst = v; - seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", + seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n", inst->queue_num, inst->peer_portid, inst->queue_total, inst->copy_mode, inst->copy_range, diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index 4fb6ee2c1106..d71cc18fa35d 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -26,18 +26,16 @@ struct nft_bitwise { }; static void nft_bitwise_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_bitwise *priv = nft_expr_priv(expr); - const struct nft_data *src = &data[priv->sreg]; - struct nft_data *dst = &data[priv->dreg]; + const u32 *src = ®s->data[priv->sreg]; + u32 *dst = ®s->data[priv->dreg]; unsigned int i; - for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) { - dst->data[i] = (src->data[i] & priv->mask.data[i]) ^ - priv->xor.data[i]; - } + for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++) + dst[i] = (src[i] & priv->mask.data[i]) ^ priv->xor.data[i]; } static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { @@ -63,28 +61,27 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_XOR] == NULL) return -EINVAL; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_SREG])); - err = nft_validate_input_register(priv->sreg); + priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); + err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) return err; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_BITWISE_DREG])); - err = nft_validate_output_register(priv->dreg); + priv->dreg = nft_parse_register(tb[NFTA_BITWISE_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, priv->len); if (err < 0) return err; - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - return err; - - priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); - err = nft_data_init(NULL, &priv->mask, &d1, tb[NFTA_BITWISE_MASK]); + err = nft_data_init(NULL, &priv->mask, sizeof(priv->mask), &d1, + tb[NFTA_BITWISE_MASK]); if (err < 0) return err; if (d1.len != priv->len) return -EINVAL; - err = nft_data_init(NULL, &priv->xor, &d2, tb[NFTA_BITWISE_XOR]); + err = nft_data_init(NULL, &priv->xor, sizeof(priv->xor), &d2, + tb[NFTA_BITWISE_XOR]); if (err < 0) return err; if (d2.len != priv->len) @@ -97,9 +94,9 @@ static int nft_bitwise_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_bitwise *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_BITWISE_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_BITWISE_SREG, priv->sreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_BITWISE_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_BITWISE_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(priv->len))) goto nla_put_failure; diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index c39ed8d29df1..fde5145f2e36 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -26,16 +26,17 @@ struct nft_byteorder { }; static void nft_byteorder_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); - struct nft_data *src = &data[priv->sreg], *dst = &data[priv->dreg]; + u32 *src = ®s->data[priv->sreg]; + u32 *dst = ®s->data[priv->dreg]; union { u32 u32; u16 u16; } *s, *d; unsigned int i; - s = (void *)src->data; - d = (void *)dst->data; + s = (void *)src; + d = (void *)dst; switch (priv->size) { case 4: @@ -87,19 +88,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, tb[NFTA_BYTEORDER_OP] == NULL) return -EINVAL; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SREG])); - err = nft_validate_input_register(priv->sreg); - if (err < 0) - return err; - - priv->dreg = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - return err; - priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); switch (priv->op) { case NFT_BYTEORDER_NTOH: @@ -109,10 +97,6 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); - if (priv->len == 0 || priv->len > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; - priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); switch (priv->size) { case 2: @@ -122,16 +106,24 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - return 0; + priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); + priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + err = nft_validate_register_load(priv->sreg, priv->len); + if (err < 0) + return err; + + priv->dreg = nft_parse_register(tb[NFTA_BYTEORDER_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_byteorder *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_BYTEORDER_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_BYTEORDER_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) goto nla_put_failure; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e2b3f51c81f1..e25b35d70e4d 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -25,13 +25,13 @@ struct nft_cmp_expr { }; static void nft_cmp_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); int d; - d = nft_data_cmp(&data[priv->sreg], &priv->data, priv->len); + d = memcmp(®s->data[priv->sreg], &priv->data, priv->len); switch (priv->op) { case NFT_CMP_EQ: if (d != 0) @@ -59,7 +59,7 @@ static void nft_cmp_eval(const struct nft_expr *expr, return; mismatch: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { @@ -75,12 +75,16 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct nft_data_desc desc; int err; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); - - err = nft_data_init(NULL, &priv->data, &desc, tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &priv->data, sizeof(priv->data), &desc, + tb[NFTA_CMP_DATA]); BUG_ON(err < 0); + priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); + err = nft_validate_register_load(priv->sreg, desc.len); + if (err < 0) + return err; + + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; } @@ -89,7 +93,7 @@ static int nft_cmp_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_cmp_expr *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(priv->op))) goto nla_put_failure; @@ -122,13 +126,18 @@ static int nft_cmp_fast_init(const struct nft_ctx *ctx, u32 mask; int err; - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - - err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &data, sizeof(data), &desc, + tb[NFTA_CMP_DATA]); BUG_ON(err < 0); - desc.len *= BITS_PER_BYTE; + priv->sreg = nft_parse_register(tb[NFTA_CMP_SREG]); + err = nft_validate_register_load(priv->sreg, desc.len); + if (err < 0) + return err; + + desc.len *= BITS_PER_BYTE; mask = nft_cmp_fast_mask(desc.len); + priv->data = data.data[0] & mask; priv->len = desc.len; return 0; @@ -139,7 +148,7 @@ static int nft_cmp_fast_dump(struct sk_buff *skb, const struct nft_expr *expr) const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr); struct nft_data data; - if (nla_put_be32(skb, NFTA_CMP_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_CMP_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CMP_OP, htonl(NFT_CMP_EQ))) goto nla_put_failure; @@ -167,7 +176,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_data_desc desc; struct nft_data data; - enum nft_registers sreg; enum nft_cmp_ops op; int err; @@ -176,11 +184,6 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) tb[NFTA_CMP_DATA] == NULL) return ERR_PTR(-EINVAL); - sreg = ntohl(nla_get_be32(tb[NFTA_CMP_SREG])); - err = nft_validate_input_register(sreg); - if (err < 0) - return ERR_PTR(err); - op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); switch (op) { case NFT_CMP_EQ: @@ -194,7 +197,8 @@ nft_cmp_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) return ERR_PTR(-EINVAL); } - err = nft_data_init(NULL, &data, &desc, tb[NFTA_CMP_DATA]); + err = nft_data_init(NULL, &data, sizeof(data), &desc, + tb[NFTA_CMP_DATA]); if (err < 0) return ERR_PTR(err); diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 589b8487cd08..7f29cfc76349 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -55,7 +55,7 @@ nft_compat_set_par(struct xt_action_param *par, void *xt, const void *xt_info) } static void nft_target_eval_xt(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { void *info = nft_expr_priv(expr); @@ -72,16 +72,16 @@ static void nft_target_eval_xt(const struct nft_expr *expr, switch (ret) { case XT_CONTINUE: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + regs->verdict.code = NFT_CONTINUE; break; default: - data[NFT_REG_VERDICT].verdict = ret; + regs->verdict.code = ret; break; } } static void nft_target_eval_bridge(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { void *info = nft_expr_priv(expr); @@ -98,19 +98,19 @@ static void nft_target_eval_bridge(const struct nft_expr *expr, switch (ret) { case EBT_ACCEPT: - data[NFT_REG_VERDICT].verdict = NF_ACCEPT; + regs->verdict.code = NF_ACCEPT; break; case EBT_DROP: - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; break; case EBT_CONTINUE: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + regs->verdict.code = NFT_CONTINUE; break; case EBT_RETURN: - data[NFT_REG_VERDICT].verdict = NFT_RETURN; + regs->verdict.code = NFT_RETURN; break; default: - data[NFT_REG_VERDICT].verdict = ret; + regs->verdict.code = ret; break; } } @@ -304,7 +304,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, } static void nft_match_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { void *info = nft_expr_priv(expr); @@ -317,16 +317,16 @@ static void nft_match_eval(const struct nft_expr *expr, ret = match->match(skb, (struct xt_action_param *)&pkt->xt); if (pkt->xt.hotdrop) { - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; return; } - switch(ret) { - case true: - data[NFT_REG_VERDICT].verdict = NFT_CONTINUE; + switch (ret ? 1 : 0) { + case 1: + regs->verdict.code = NFT_CONTINUE; break; - case false: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + case 0: + regs->verdict.code = NFT_BREAK; break; } } diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index c89ee486ce54..17591239229f 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -24,7 +24,7 @@ struct nft_counter { }; static void nft_counter_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_counter *priv = nft_expr_priv(expr); @@ -92,6 +92,7 @@ static struct nft_expr_type nft_counter_type __read_mostly = { .ops = &nft_counter_ops, .policy = nft_counter_policy, .maxattr = NFTA_COUNTER_MAX, + .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index cc5603016242..8cbca3432f90 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -31,11 +31,11 @@ struct nft_ct { }; static void nft_ct_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; const struct nf_conn_help *help; @@ -54,8 +54,10 @@ static void nft_ct_get_eval(const struct nft_expr *expr, state = NF_CT_STATE_UNTRACKED_BIT; else state = NF_CT_STATE_BIT(ctinfo); - dest->data[0] = state; + *dest = state; return; + default: + break; } if (ct == NULL) @@ -63,26 +65,26 @@ static void nft_ct_get_eval(const struct nft_expr *expr, switch (priv->key) { case NFT_CT_DIRECTION: - dest->data[0] = CTINFO2DIR(ctinfo); + *dest = CTINFO2DIR(ctinfo); return; case NFT_CT_STATUS: - dest->data[0] = ct->status; + *dest = ct->status; return; #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: - dest->data[0] = ct->mark; + *dest = ct->mark; return; #endif #ifdef CONFIG_NF_CONNTRACK_SECMARK case NFT_CT_SECMARK: - dest->data[0] = ct->secmark; + *dest = ct->secmark; return; #endif case NFT_CT_EXPIRATION: diff = (long)jiffies - (long)ct->timeout.expires; if (diff < 0) diff = 0; - dest->data[0] = jiffies_to_msecs(diff); + *dest = jiffies_to_msecs(diff); return; case NFT_CT_HELPER: if (ct->master == NULL) @@ -93,9 +95,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr, helper = rcu_dereference(help->helper); if (helper == NULL) goto err; - if (strlen(helper->name) >= sizeof(dest->data)) - goto err; - strncpy((char *)dest->data, helper->name, sizeof(dest->data)); + strncpy((char *)dest, helper->name, NF_CT_HELPER_NAME_LEN); return; #ifdef CONFIG_NF_CONNTRACK_LABELS case NFT_CT_LABELS: { @@ -103,58 +103,60 @@ static void nft_ct_get_eval(const struct nft_expr *expr, unsigned int size; if (!labels) { - memset(dest->data, 0, sizeof(dest->data)); + memset(dest, 0, NF_CT_LABELS_MAX_SIZE); return; } - BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE > sizeof(dest->data)); size = labels->words * sizeof(long); - - memcpy(dest->data, labels->bits, size); - if (size < sizeof(dest->data)) - memset(((char *) dest->data) + size, 0, - sizeof(dest->data) - size); + memcpy(dest, labels->bits, size); + if (size < NF_CT_LABELS_MAX_SIZE) + memset(((char *) dest) + size, 0, + NF_CT_LABELS_MAX_SIZE - size); return; } #endif + default: + break; } tuple = &ct->tuplehash[priv->dir].tuple; switch (priv->key) { case NFT_CT_L3PROTOCOL: - dest->data[0] = nf_ct_l3num(ct); + *dest = nf_ct_l3num(ct); return; case NFT_CT_SRC: - memcpy(dest->data, tuple->src.u3.all, + memcpy(dest, tuple->src.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_DST: - memcpy(dest->data, tuple->dst.u3.all, + memcpy(dest, tuple->dst.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; case NFT_CT_PROTOCOL: - dest->data[0] = nf_ct_protonum(ct); + *dest = nf_ct_protonum(ct); return; case NFT_CT_PROTO_SRC: - dest->data[0] = (__force __u16)tuple->src.u.all; + *dest = (__force __u16)tuple->src.u.all; return; case NFT_CT_PROTO_DST: - dest->data[0] = (__force __u16)tuple->dst.u.all; + *dest = (__force __u16)tuple->dst.u.all; return; + default: + break; } return; err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static void nft_ct_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_ct *priv = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; #ifdef CONFIG_NF_CONNTRACK_MARK - u32 value = data[priv->sreg].data[0]; + u32 value = regs->data[priv->sreg]; #endif enum ip_conntrack_info ctinfo; struct nf_conn *ct; @@ -172,6 +174,8 @@ static void nft_ct_set_eval(const struct nft_expr *expr, } break; #endif + default: + break; } } @@ -220,12 +224,17 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); + unsigned int len; int err; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (priv->key) { - case NFT_CT_STATE: case NFT_CT_DIRECTION: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + len = sizeof(u8); + break; + case NFT_CT_STATE: case NFT_CT_STATUS: #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: @@ -233,22 +242,54 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, #ifdef CONFIG_NF_CONNTRACK_SECMARK case NFT_CT_SECMARK: #endif + case NFT_CT_EXPIRATION: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + len = sizeof(u32); + break; #ifdef CONFIG_NF_CONNTRACK_LABELS case NFT_CT_LABELS: + if (tb[NFTA_CT_DIRECTION] != NULL) + return -EINVAL; + len = NF_CT_LABELS_MAX_SIZE; + break; #endif - case NFT_CT_EXPIRATION: case NFT_CT_HELPER: if (tb[NFTA_CT_DIRECTION] != NULL) return -EINVAL; + len = NF_CT_HELPER_NAME_LEN; break; + case NFT_CT_L3PROTOCOL: case NFT_CT_PROTOCOL: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + len = sizeof(u8); + break; case NFT_CT_SRC: case NFT_CT_DST: + if (tb[NFTA_CT_DIRECTION] == NULL) + return -EINVAL; + + switch (ctx->afi->family) { + case NFPROTO_IPV4: + len = FIELD_SIZEOF(struct nf_conntrack_tuple, + src.u3.ip); + break; + case NFPROTO_IPV6: + case NFPROTO_INET: + len = FIELD_SIZEOF(struct nf_conntrack_tuple, + src.u3.ip6); + break; + default: + return -EAFNOSUPPORT; + } + break; case NFT_CT_PROTO_SRC: case NFT_CT_PROTO_DST: if (tb[NFTA_CT_DIRECTION] == NULL) return -EINVAL; + len = FIELD_SIZEOF(struct nf_conntrack_tuple, src.u.all); break; default: return -EOPNOTSUPP; @@ -265,12 +306,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, } } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_CT_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + priv->dreg = nft_parse_register(tb[NFTA_CT_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); if (err < 0) return err; @@ -286,20 +324,22 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_ct *priv = nft_expr_priv(expr); + unsigned int len; int err; priv->key = ntohl(nla_get_be32(tb[NFTA_CT_KEY])); switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: + len = FIELD_SIZEOF(struct nf_conn, mark); break; #endif default: return -EOPNOTSUPP; } - priv->sreg = ntohl(nla_get_be32(tb[NFTA_CT_SREG])); - err = nft_validate_input_register(priv->sreg); + priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]); + err = nft_validate_register_load(priv->sreg, len); if (err < 0) return err; @@ -320,7 +360,7 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ct *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_CT_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_CT_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) goto nla_put_failure; @@ -347,7 +387,7 @@ static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_ct *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_CT_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_CT_SREG, priv->sreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key))) goto nla_put_failure; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c new file mode 100644 index 000000000000..513a8ef60a59 --- /dev/null +++ b/net/netfilter/nft_dynset.c @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2015 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> + +struct nft_dynset { + struct nft_set *set; + struct nft_set_ext_tmpl tmpl; + enum nft_dynset_ops op:8; + enum nft_registers sreg_key:8; + enum nft_registers sreg_data:8; + u64 timeout; + struct nft_expr *expr; + struct nft_set_binding binding; +}; + +static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr, + struct nft_regs *regs) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set_ext *ext; + u64 timeout; + void *elem; + + if (set->size && !atomic_add_unless(&set->nelems, 1, set->size)) + return NULL; + + timeout = priv->timeout ? : set->timeout; + elem = nft_set_elem_init(set, &priv->tmpl, + ®s->data[priv->sreg_key], + ®s->data[priv->sreg_data], + timeout, GFP_ATOMIC); + if (elem == NULL) { + if (set->size) + atomic_dec(&set->nelems); + return NULL; + } + + ext = nft_set_elem_ext(set, elem); + if (priv->expr != NULL) + nft_expr_clone(nft_set_ext_expr(ext), priv->expr); + + return elem; +} + +static void nft_dynset_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set *set = priv->set; + const struct nft_set_ext *ext; + const struct nft_expr *sexpr; + u64 timeout; + + if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, + expr, regs, &ext)) { + sexpr = NULL; + if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR)) + sexpr = nft_set_ext_expr(ext); + + if (priv->op == NFT_DYNSET_OP_UPDATE && + nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) { + timeout = priv->timeout ? : set->timeout; + *nft_set_ext_expiration(ext) = jiffies + timeout; + } else if (sexpr == NULL) + goto out; + + if (sexpr != NULL) + sexpr->ops->eval(sexpr, regs, pkt); + return; + } +out: + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { + [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING }, + [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 }, + [NFTA_DYNSET_OP] = { .type = NLA_U32 }, + [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 }, + [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, + [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, + [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, +}; + +static int nft_dynset_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_dynset *priv = nft_expr_priv(expr); + struct nft_set *set; + u64 timeout; + int err; + + if (tb[NFTA_DYNSET_SET_NAME] == NULL || + tb[NFTA_DYNSET_OP] == NULL || + tb[NFTA_DYNSET_SREG_KEY] == NULL) + return -EINVAL; + + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]); + if (IS_ERR(set)) { + if (tb[NFTA_DYNSET_SET_ID]) + set = nf_tables_set_lookup_byid(ctx->net, + tb[NFTA_DYNSET_SET_ID]); + if (IS_ERR(set)) + return PTR_ERR(set); + } + + if (set->flags & NFT_SET_CONSTANT) + return -EBUSY; + + priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); + switch (priv->op) { + case NFT_DYNSET_OP_ADD: + break; + case NFT_DYNSET_OP_UPDATE: + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EOPNOTSUPP; + break; + default: + return -EOPNOTSUPP; + } + + timeout = 0; + if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { + if (!(set->flags & NFT_SET_TIMEOUT)) + return -EINVAL; + timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT])); + } + + priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); + err = nft_validate_register_load(priv->sreg_key, set->klen);; + if (err < 0) + return err; + + if (tb[NFTA_DYNSET_SREG_DATA] != NULL) { + if (!(set->flags & NFT_SET_MAP)) + return -EINVAL; + if (set->dtype == NFT_DATA_VERDICT) + return -EOPNOTSUPP; + + priv->sreg_data = nft_parse_register(tb[NFTA_DYNSET_SREG_DATA]); + err = nft_validate_register_load(priv->sreg_data, set->dlen); + if (err < 0) + return err; + } else if (set->flags & NFT_SET_MAP) + return -EINVAL; + + if (tb[NFTA_DYNSET_EXPR] != NULL) { + if (!(set->flags & NFT_SET_EVAL)) + return -EINVAL; + if (!(set->flags & NFT_SET_ANONYMOUS)) + return -EOPNOTSUPP; + + priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]); + if (IS_ERR(priv->expr)) + return PTR_ERR(priv->expr); + + err = -EOPNOTSUPP; + if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL)) + goto err1; + } else if (set->flags & NFT_SET_EVAL) + return -EINVAL; + + nft_set_ext_prepare(&priv->tmpl); + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); + if (set->flags & NFT_SET_MAP) + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen); + if (priv->expr != NULL) + nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR, + priv->expr->ops->size); + if (set->flags & NFT_SET_TIMEOUT) { + if (timeout || set->timeout) + nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION); + } + + priv->timeout = timeout; + + err = nf_tables_bind_set(ctx, set, &priv->binding); + if (err < 0) + goto err1; + + priv->set = set; + return 0; + +err1: + if (priv->expr != NULL) + nft_expr_destroy(ctx, priv->expr); + return err; +} + +static void nft_dynset_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_dynset *priv = nft_expr_priv(expr); + + nf_tables_unbind_set(ctx, priv->set, &priv->binding); + if (priv->expr != NULL) + nft_expr_destroy(ctx, priv->expr); +} + +static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_dynset *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key)) + goto nla_put_failure; + if (priv->set->flags & NFT_SET_MAP && + nft_dump_register(skb, NFTA_DYNSET_SREG_DATA, priv->sreg_data)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op))) + goto nla_put_failure; + if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) + goto nla_put_failure; + if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout))) + goto nla_put_failure; + if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_dynset_type; +static const struct nft_expr_ops nft_dynset_ops = { + .type = &nft_dynset_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)), + .eval = nft_dynset_eval, + .init = nft_dynset_init, + .destroy = nft_dynset_destroy, + .dump = nft_dynset_dump, +}; + +static struct nft_expr_type nft_dynset_type __read_mostly = { + .name = "dynset", + .ops = &nft_dynset_ops, + .policy = nft_dynset_policy, + .maxattr = NFTA_DYNSET_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_dynset_module_init(void) +{ + return nft_register_expr(&nft_dynset_type); +} + +void nft_dynset_module_exit(void) +{ + nft_unregister_expr(&nft_dynset_type); +} diff --git a/net/netfilter/nft_expr_template.c b/net/netfilter/nft_expr_template.c deleted file mode 100644 index b6eed4d5a096..000000000000 --- a/net/netfilter/nft_expr_template.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/netlink.h> -#include <linux/netfilter.h> -#include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> - -struct nft_template { - -}; - -static void nft_template_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], - const struct nft_pktinfo *pkt) -{ - struct nft_template *priv = nft_expr_priv(expr); - -} - -static const struct nla_policy nft_template_policy[NFTA_TEMPLATE_MAX + 1] = { - [NFTA_TEMPLATE_ATTR] = { .type = NLA_U32 }, -}; - -static int nft_template_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) -{ - struct nft_template *priv = nft_expr_priv(expr); - - return 0; -} - -static void nft_template_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) -{ - struct nft_template *priv = nft_expr_priv(expr); - -} - -static int nft_template_dump(struct sk_buff *skb, const struct nft_expr *expr) -{ - const struct nft_template *priv = nft_expr_priv(expr); - - NLA_PUT_BE32(skb, NFTA_TEMPLATE_ATTR, priv->field); - return 0; - -nla_put_failure: - return -1; -} - -static struct nft_expr_type nft_template_type; -static const struct nft_expr_ops nft_template_ops = { - .type = &nft_template_type, - .size = NFT_EXPR_SIZE(sizeof(struct nft_template)), - .eval = nft_template_eval, - .init = nft_template_init, - .destroy = nft_template_destroy, - .dump = nft_template_dump, -}; - -static struct nft_expr_type nft_template_type __read_mostly = { - .name = "template", - .ops = &nft_template_ops, - .policy = nft_template_policy, - .maxattr = NFTA_TEMPLATE_MAX, - .owner = THIS_MODULE, -}; - -static int __init nft_template_module_init(void) -{ - return nft_register_expr(&nft_template_type); -} - -static void __exit nft_template_module_exit(void) -{ - nft_unregister_expr(&nft_template_type); -} - -module_init(nft_template_module_init); -module_exit(nft_template_module_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_ALIAS_NFT_EXPR("template"); diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 55c939f5371f..ba7aed13e174 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -26,11 +26,11 @@ struct nft_exthdr { }; static void nft_exthdr_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_exthdr *priv = nft_expr_priv(expr); - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; unsigned int offset = 0; int err; @@ -39,11 +39,12 @@ static void nft_exthdr_eval(const struct nft_expr *expr, goto err; offset += priv->offset; - if (skb_copy_bits(pkt->skb, offset, dest->data, priv->len) < 0) + dest[priv->len / NFT_REG32_SIZE] = 0; + if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) goto err; return; err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { @@ -58,7 +59,6 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - int err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || @@ -69,22 +69,17 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); - if (priv->len == 0 || - priv->len > FIELD_SIZEOF(struct nft_data, data)) - return -EINVAL; + priv->dreg = nft_parse_register(tb[NFTA_EXTHDR_DREG]); - priv->dreg = ntohl(nla_get_be32(tb[NFTA_EXTHDR_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, priv->len); } static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_exthdr *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_EXTHDR_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) goto nla_put_failure; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index c7e1a9d7d46f..3f9d45d3d9b7 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -15,6 +15,7 @@ #include <linux/log2.h> #include <linux/jhash.h> #include <linux/netlink.h> +#include <linux/workqueue.h> #include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> @@ -25,6 +26,7 @@ struct nft_hash { struct rhashtable ht; + struct delayed_work gc_work; }; struct nft_hash_elem { @@ -34,7 +36,7 @@ struct nft_hash_elem { struct nft_hash_cmp_arg { const struct nft_set *set; - const struct nft_data *key; + const u32 *key; u8 genmask; }; @@ -60,15 +62,16 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, const struct nft_hash_cmp_arg *x = arg->key; const struct nft_hash_elem *he = ptr; - if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) + if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) + return 1; + if (nft_set_elem_expired(&he->ext)) return 1; if (!nft_set_elem_active(&he->ext, x->genmask)) return 1; return 0; } -static bool nft_hash_lookup(const struct nft_set *set, - const struct nft_data *key, +static bool nft_hash_lookup(const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { struct nft_hash *priv = nft_set_priv(set); @@ -86,6 +89,42 @@ static bool nft_hash_lookup(const struct nft_set *set, return !!he; } +static bool nft_hash_update(struct nft_set *set, const u32 *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_regs *regs), + const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = NFT_GENMASK_ANY, + .set = set, + .key = key, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + goto out; + + he = new(set, expr, regs); + if (he == NULL) + goto err1; + if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params)) + goto err2; +out: + *ext = &he->ext; + return true; + +err2: + nft_set_elem_destroy(set, he); +err1: + return false; +} + static int nft_hash_insert(const struct nft_set *set, const struct nft_set_elem *elem) { @@ -94,7 +133,7 @@ static int nft_hash_insert(const struct nft_set *set, struct nft_hash_cmp_arg arg = { .genmask = nft_genmask_next(read_pnet(&set->pnet)), .set = set, - .key = &elem->key, + .key = elem->key.val.data, }; return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, @@ -107,6 +146,7 @@ static void nft_hash_activate(const struct nft_set *set, struct nft_hash_elem *he = elem->priv; nft_set_elem_change_active(set, &he->ext); + nft_set_elem_clear_busy(&he->ext); } static void *nft_hash_deactivate(const struct nft_set *set, @@ -117,12 +157,18 @@ static void *nft_hash_deactivate(const struct nft_set *set, struct nft_hash_cmp_arg arg = { .genmask = nft_genmask_next(read_pnet(&set->pnet)), .set = set, - .key = &elem->key, + .key = elem->key.val.data, }; + rcu_read_lock(); he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) - nft_set_elem_change_active(set, &he->ext); + if (he != NULL) { + if (!nft_set_elem_mark_busy(&he->ext)) + nft_set_elem_change_active(set, &he->ext); + else + he = NULL; + } + rcu_read_unlock(); return he; } @@ -170,6 +216,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, if (iter->count < iter->skip) goto cont; + if (nft_set_elem_expired(&he->ext)) + goto cont; if (!nft_set_elem_active(&he->ext, genmask)) goto cont; @@ -188,6 +236,55 @@ out: rhashtable_walk_exit(&hti); } +static void nft_hash_gc(struct work_struct *work) +{ + struct nft_set *set; + struct nft_hash_elem *he; + struct nft_hash *priv; + struct nft_set_gc_batch *gcb = NULL; + struct rhashtable_iter hti; + int err; + + priv = container_of(work, struct nft_hash, gc_work.work); + set = nft_set_container_of(priv); + + err = rhashtable_walk_init(&priv->ht, &hti); + if (err) + goto schedule; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) + goto out; + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + if (PTR_ERR(he) != -EAGAIN) + goto out; + continue; + } + + if (!nft_set_elem_expired(&he->ext)) + continue; + if (nft_set_elem_mark_busy(&he->ext)) + continue; + + gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); + if (gcb == NULL) + goto out; + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); + atomic_dec(&set->nelems); + nft_set_gc_batch_add(gcb, he); + } +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + nft_set_gc_batch_complete(gcb); +schedule: + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); +} + static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) { return sizeof(struct nft_hash); @@ -207,11 +304,20 @@ static int nft_hash_init(const struct nft_set *set, { struct nft_hash *priv = nft_set_priv(set); struct rhashtable_params params = nft_hash_params; + int err; params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; params.key_len = set->klen; - return rhashtable_init(&priv->ht, ¶ms); + err = rhashtable_init(&priv->ht, ¶ms); + if (err < 0) + return err; + + INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); + if (set->flags & NFT_SET_TIMEOUT) + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); + return 0; } static void nft_hash_elem_destroy(void *ptr, void *arg) @@ -223,6 +329,7 @@ static void nft_hash_destroy(const struct nft_set *set) { struct nft_hash *priv = nft_set_priv(set); + cancel_delayed_work_sync(&priv->gc_work); rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, (void *)set); } @@ -263,8 +370,9 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .deactivate = nft_hash_deactivate, .remove = nft_hash_remove, .lookup = nft_hash_lookup, + .update = nft_hash_update, .walk = nft_hash_walk, - .features = NFT_SET_MAP, + .features = NFT_SET_MAP | NFT_SET_TIMEOUT, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 810385eb7249..db3b746858e3 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -24,12 +24,12 @@ struct nft_immediate_expr { }; static void nft_immediate_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); - nft_data_copy(&data[priv->dreg], &priv->data); + nft_data_copy(®s->data[priv->dreg], &priv->data, priv->dlen); } static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { @@ -49,17 +49,15 @@ static int nft_immediate_init(const struct nft_ctx *ctx, tb[NFTA_IMMEDIATE_DATA] == NULL) return -EINVAL; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_IMMEDIATE_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_data_init(ctx, &priv->data, &desc, tb[NFTA_IMMEDIATE_DATA]); + err = nft_data_init(ctx, &priv->data, sizeof(priv->data), &desc, + tb[NFTA_IMMEDIATE_DATA]); if (err < 0) return err; priv->dlen = desc.len; - err = nft_validate_data_load(ctx, priv->dreg, &priv->data, desc.type); + priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, &priv->data, + desc.type, desc.len); if (err < 0) goto err1; @@ -81,7 +79,7 @@ static int nft_immediate_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_IMMEDIATE_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_IMMEDIATE_DREG, priv->dreg)) goto nla_put_failure; return nft_data_dump(skb, NFTA_IMMEDIATE_DATA, &priv->data, diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 85da5bd02f64..435c1ccd6c0e 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -27,7 +27,7 @@ struct nft_limit { }; static void nft_limit_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_limit *priv = nft_expr_priv(expr); @@ -45,7 +45,7 @@ static void nft_limit_eval(const struct nft_expr *expr, } spin_unlock_bh(&limit_lock); - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { @@ -98,6 +98,7 @@ static struct nft_expr_type nft_limit_type __read_mostly = { .ops = &nft_limit_ops, .policy = nft_limit_policy, .maxattr = NFTA_LIMIT_MAX, + .flags = NFT_EXPR_STATEFUL, .owner = THIS_MODULE, }; diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index e18af9db2f04..a13d6a386d63 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -27,7 +27,7 @@ struct nft_log { }; static void nft_log_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_log *priv = nft_expr_priv(expr); diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index a5f30b8760ea..b3c31ef8015d 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -26,19 +26,20 @@ struct nft_lookup { }; static void nft_lookup_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_lookup *priv = nft_expr_priv(expr); const struct nft_set *set = priv->set; const struct nft_set_ext *ext; - if (set->ops->lookup(set, &data[priv->sreg], &ext)) { + if (set->ops->lookup(set, ®s->data[priv->sreg], &ext)) { if (set->flags & NFT_SET_MAP) - nft_data_copy(&data[priv->dreg], nft_set_ext_data(ext)); + nft_data_copy(®s->data[priv->dreg], + nft_set_ext_data(ext), set->dlen); return; } - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { @@ -70,8 +71,11 @@ static int nft_lookup_init(const struct nft_ctx *ctx, return PTR_ERR(set); } - priv->sreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_SREG])); - err = nft_validate_input_register(priv->sreg); + if (set->flags & NFT_SET_EVAL) + return -EOPNOTSUPP; + + priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]); + err = nft_validate_register_load(priv->sreg, set->klen); if (err < 0) return err; @@ -79,19 +83,16 @@ static int nft_lookup_init(const struct nft_ctx *ctx, if (!(set->flags & NFT_SET_MAP)) return -EINVAL; - priv->dreg = ntohl(nla_get_be32(tb[NFTA_LOOKUP_DREG])); - err = nft_validate_output_register(priv->dreg); + priv->dreg = nft_parse_register(tb[NFTA_LOOKUP_DREG]); + err = nft_validate_register_store(ctx, priv->dreg, NULL, + set->dtype, set->dlen); if (err < 0) return err; - - if (priv->dreg == NFT_REG_VERDICT) { - if (set->dtype != NFT_DATA_VERDICT) - return -EINVAL; - } else if (set->dtype == NFT_DATA_VERDICT) - return -EINVAL; } else if (set->flags & NFT_SET_MAP) return -EINVAL; + priv->binding.flags = set->flags & NFT_SET_MAP; + err = nf_tables_bind_set(ctx, set, &priv->binding); if (err < 0) return err; @@ -114,10 +115,10 @@ static int nft_lookup_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_LOOKUP_SET, priv->set->name)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_LOOKUP_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg)) goto nla_put_failure; if (priv->set->flags & NFT_SET_MAP) - if (nla_put_be32(skb, NFTA_LOOKUP_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg)) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 5197874372ec..52561e1c31e2 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -25,62 +25,65 @@ #include <net/netfilter/nft_meta.h> void nft_meta_get_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; const struct net_device *in = pkt->in, *out = pkt->out; - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; switch (priv->key) { case NFT_META_LEN: - dest->data[0] = skb->len; + *dest = skb->len; break; case NFT_META_PROTOCOL: - *(__be16 *)dest->data = skb->protocol; + *dest = 0; + *(__be16 *)dest = skb->protocol; break; case NFT_META_NFPROTO: - dest->data[0] = pkt->ops->pf; + *dest = pkt->ops->pf; break; case NFT_META_L4PROTO: - dest->data[0] = pkt->tprot; + *dest = pkt->tprot; break; case NFT_META_PRIORITY: - dest->data[0] = skb->priority; + *dest = skb->priority; break; case NFT_META_MARK: - dest->data[0] = skb->mark; + *dest = skb->mark; break; case NFT_META_IIF: if (in == NULL) goto err; - dest->data[0] = in->ifindex; + *dest = in->ifindex; break; case NFT_META_OIF: if (out == NULL) goto err; - dest->data[0] = out->ifindex; + *dest = out->ifindex; break; case NFT_META_IIFNAME: if (in == NULL) goto err; - strncpy((char *)dest->data, in->name, sizeof(dest->data)); + strncpy((char *)dest, in->name, IFNAMSIZ); break; case NFT_META_OIFNAME: if (out == NULL) goto err; - strncpy((char *)dest->data, out->name, sizeof(dest->data)); + strncpy((char *)dest, out->name, IFNAMSIZ); break; case NFT_META_IIFTYPE: if (in == NULL) goto err; - *(u16 *)dest->data = in->type; + *dest = 0; + *(u16 *)dest = in->type; break; case NFT_META_OIFTYPE: if (out == NULL) goto err; - *(u16 *)dest->data = out->type; + *dest = 0; + *(u16 *)dest = out->type; break; case NFT_META_SKUID: if (skb->sk == NULL || !sk_fullsock(skb->sk)) @@ -93,8 +96,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; } - dest->data[0] = - from_kuid_munged(&init_user_ns, + *dest = from_kuid_munged(&init_user_ns, skb->sk->sk_socket->file->f_cred->fsuid); read_unlock_bh(&skb->sk->sk_callback_lock); break; @@ -108,8 +110,7 @@ void nft_meta_get_eval(const struct nft_expr *expr, read_unlock_bh(&skb->sk->sk_callback_lock); goto err; } - dest->data[0] = - from_kgid_munged(&init_user_ns, + *dest = from_kgid_munged(&init_user_ns, skb->sk->sk_socket->file->f_cred->fsgid); read_unlock_bh(&skb->sk->sk_callback_lock); break; @@ -119,33 +120,33 @@ void nft_meta_get_eval(const struct nft_expr *expr, if (dst == NULL) goto err; - dest->data[0] = dst->tclassid; + *dest = dst->tclassid; break; } #endif #ifdef CONFIG_NETWORK_SECMARK case NFT_META_SECMARK: - dest->data[0] = skb->secmark; + *dest = skb->secmark; break; #endif case NFT_META_PKTTYPE: if (skb->pkt_type != PACKET_LOOPBACK) { - dest->data[0] = skb->pkt_type; + *dest = skb->pkt_type; break; } switch (pkt->ops->pf) { case NFPROTO_IPV4: if (ipv4_is_multicast(ip_hdr(skb)->daddr)) - dest->data[0] = PACKET_MULTICAST; + *dest = PACKET_MULTICAST; else - dest->data[0] = PACKET_BROADCAST; + *dest = PACKET_BROADCAST; break; case NFPROTO_IPV6: if (ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) - dest->data[0] = PACKET_MULTICAST; + *dest = PACKET_MULTICAST; else - dest->data[0] = PACKET_BROADCAST; + *dest = PACKET_BROADCAST; break; default: WARN_ON(1); @@ -153,23 +154,22 @@ void nft_meta_get_eval(const struct nft_expr *expr, } break; case NFT_META_CPU: - dest->data[0] = raw_smp_processor_id(); + *dest = raw_smp_processor_id(); break; case NFT_META_IIFGROUP: if (in == NULL) goto err; - dest->data[0] = in->group; + *dest = in->group; break; case NFT_META_OIFGROUP: if (out == NULL) goto err; - dest->data[0] = out->group; + *dest = out->group; break; case NFT_META_CGROUP: - if (skb->sk == NULL) - break; - - dest->data[0] = skb->sk->sk_classid; + if (skb->sk == NULL || !sk_fullsock(skb->sk)) + goto err; + *dest = skb->sk->sk_classid; break; default: WARN_ON(1); @@ -178,17 +178,17 @@ void nft_meta_get_eval(const struct nft_expr *expr, return; err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } EXPORT_SYMBOL_GPL(nft_meta_get_eval); void nft_meta_set_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_meta *meta = nft_expr_priv(expr); struct sk_buff *skb = pkt->skb; - u32 value = data[meta->sreg].data[0]; + u32 value = regs->data[meta->sreg]; switch (meta->key) { case NFT_META_MARK: @@ -218,22 +218,22 @@ int nft_meta_get_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); - int err; + unsigned int len; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { - case NFT_META_LEN: case NFT_META_PROTOCOL: + case NFT_META_IIFTYPE: + case NFT_META_OIFTYPE: + len = sizeof(u16); + break; case NFT_META_NFPROTO: case NFT_META_L4PROTO: + case NFT_META_LEN: case NFT_META_PRIORITY: case NFT_META_MARK: case NFT_META_IIF: case NFT_META_OIF: - case NFT_META_IIFNAME: - case NFT_META_OIFNAME: - case NFT_META_IIFTYPE: - case NFT_META_OIFTYPE: case NFT_META_SKUID: case NFT_META_SKGID: #ifdef CONFIG_IP_ROUTE_CLASSID @@ -247,21 +247,19 @@ int nft_meta_get_init(const struct nft_ctx *ctx, case NFT_META_IIFGROUP: case NFT_META_OIFGROUP: case NFT_META_CGROUP: + len = sizeof(u32); + break; + case NFT_META_IIFNAME: + case NFT_META_OIFNAME: + len = IFNAMSIZ; break; default: return -EOPNOTSUPP; } - priv->dreg = ntohl(nla_get_be32(tb[NFTA_META_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - - err = nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); - if (err < 0) - return err; - - return 0; + priv->dreg = nft_parse_register(tb[NFTA_META_DREG]); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, len); } EXPORT_SYMBOL_GPL(nft_meta_get_init); @@ -270,20 +268,24 @@ int nft_meta_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_meta *priv = nft_expr_priv(expr); + unsigned int len; int err; priv->key = ntohl(nla_get_be32(tb[NFTA_META_KEY])); switch (priv->key) { case NFT_META_MARK: case NFT_META_PRIORITY: + len = sizeof(u32); + break; case NFT_META_NFTRACE: + len = sizeof(u8); break; default: return -EOPNOTSUPP; } - priv->sreg = ntohl(nla_get_be32(tb[NFTA_META_SREG])); - err = nft_validate_input_register(priv->sreg); + priv->sreg = nft_parse_register(tb[NFTA_META_SREG]); + err = nft_validate_register_load(priv->sreg, len); if (err < 0) return err; @@ -298,7 +300,7 @@ int nft_meta_get_dump(struct sk_buff *skb, if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_META_DREG, htonl(priv->dreg))) + if (nft_dump_register(skb, NFTA_META_DREG, priv->dreg)) goto nla_put_failure; return 0; @@ -314,7 +316,7 @@ int nft_meta_set_dump(struct sk_buff *skb, if (nla_put_be32(skb, NFTA_META_KEY, htonl(priv->key))) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_META_SREG, htonl(priv->sreg))) + if (nft_dump_register(skb, NFTA_META_SREG, priv->sreg)) goto nla_put_failure; return 0; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index a0837c6c9283..ee2d71753746 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -37,7 +37,7 @@ struct nft_nat { }; static void nft_nat_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_nat *priv = nft_expr_priv(expr); @@ -49,33 +49,32 @@ static void nft_nat_eval(const struct nft_expr *expr, if (priv->sreg_addr_min) { if (priv->family == AF_INET) { range.min_addr.ip = (__force __be32) - data[priv->sreg_addr_min].data[0]; + regs->data[priv->sreg_addr_min]; range.max_addr.ip = (__force __be32) - data[priv->sreg_addr_max].data[0]; + regs->data[priv->sreg_addr_max]; } else { memcpy(range.min_addr.ip6, - data[priv->sreg_addr_min].data, - sizeof(struct nft_data)); + ®s->data[priv->sreg_addr_min], + sizeof(range.min_addr.ip6)); memcpy(range.max_addr.ip6, - data[priv->sreg_addr_max].data, - sizeof(struct nft_data)); + ®s->data[priv->sreg_addr_max], + sizeof(range.max_addr.ip6)); } range.flags |= NF_NAT_RANGE_MAP_IPS; } if (priv->sreg_proto_min) { range.min_proto.all = - *(__be16 *)&data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min]; range.max_proto.all = - *(__be16 *)&data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max]; range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } range.flags |= priv->flags; - data[NFT_REG_VERDICT].verdict = - nf_nat_setup_info(ct, &range, priv->type); + regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type); } static const struct nla_policy nft_nat_policy[NFTA_NAT_MAX + 1] = { @@ -119,6 +118,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_nat *priv = nft_expr_priv(expr); + unsigned int alen, plen; u32 family; int err; @@ -146,25 +146,34 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, return -EINVAL; family = ntohl(nla_get_be32(tb[NFTA_NAT_FAMILY])); - if (family != AF_INET && family != AF_INET6) - return -EAFNOSUPPORT; if (family != ctx->afi->family) return -EOPNOTSUPP; + + switch (family) { + case NFPROTO_IPV4: + alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip); + break; + case NFPROTO_IPV6: + alen = FIELD_SIZEOF(struct nf_nat_range, min_addr.ip6); + break; + default: + return -EAFNOSUPPORT; + } priv->family = family; if (tb[NFTA_NAT_REG_ADDR_MIN]) { priv->sreg_addr_min = - ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN])); - - err = nft_validate_input_register(priv->sreg_addr_min); + nft_parse_register(tb[NFTA_NAT_REG_ADDR_MIN]); + err = nft_validate_register_load(priv->sreg_addr_min, alen); if (err < 0) return err; if (tb[NFTA_NAT_REG_ADDR_MAX]) { priv->sreg_addr_max = - ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX])); + nft_parse_register(tb[NFTA_NAT_REG_ADDR_MAX]); - err = nft_validate_input_register(priv->sreg_addr_max); + err = nft_validate_register_load(priv->sreg_addr_max, + alen); if (err < 0) return err; } else { @@ -172,19 +181,21 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr, } } + plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_NAT_REG_PROTO_MIN]) { priv->sreg_proto_min = - ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN])); + nft_parse_register(tb[NFTA_NAT_REG_PROTO_MIN]); - err = nft_validate_input_register(priv->sreg_proto_min); + err = nft_validate_register_load(priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_NAT_REG_PROTO_MAX]) { priv->sreg_proto_max = - ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX])); + nft_parse_register(tb[NFTA_NAT_REG_PROTO_MAX]); - err = nft_validate_input_register(priv->sreg_proto_max); + err = nft_validate_register_load(priv->sreg_proto_max, + plen); if (err < 0) return err; } else { @@ -220,18 +231,18 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (priv->sreg_addr_min) { - if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN, - htonl(priv->sreg_addr_min)) || - nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX, - htonl(priv->sreg_addr_max))) + if (nft_dump_register(skb, NFTA_NAT_REG_ADDR_MIN, + priv->sreg_addr_min) || + nft_dump_register(skb, NFTA_NAT_REG_ADDR_MAX, + priv->sreg_addr_max)) goto nla_put_failure; } if (priv->sreg_proto_min) { - if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN, - htonl(priv->sreg_proto_min)) || - nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX, - htonl(priv->sreg_proto_max))) + if (nft_dump_register(skb, NFTA_NAT_REG_PROTO_MIN, + priv->sreg_proto_min) || + nft_dump_register(skb, NFTA_NAT_REG_PROTO_MAX, + priv->sreg_proto_max)) goto nla_put_failure; } diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 85daa84bfdfe..94fb3b27a2c5 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -18,12 +18,12 @@ #include <net/netfilter/nf_tables.h> static void nft_payload_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_payload *priv = nft_expr_priv(expr); const struct sk_buff *skb = pkt->skb; - struct nft_data *dest = &data[priv->dreg]; + u32 *dest = ®s->data[priv->dreg]; int offset; switch (priv->base) { @@ -43,11 +43,12 @@ static void nft_payload_eval(const struct nft_expr *expr, } offset += priv->offset; - if (skb_copy_bits(skb, offset, dest->data, priv->len) < 0) + dest[priv->len / NFT_REG32_SIZE] = 0; + if (skb_copy_bits(skb, offset, dest, priv->len) < 0) goto err; return; err: - data[NFT_REG_VERDICT].verdict = NFT_BREAK; + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { @@ -62,24 +63,21 @@ static int nft_payload_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_payload *priv = nft_expr_priv(expr); - int err; priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE])); priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); + priv->dreg = nft_parse_register(tb[NFTA_PAYLOAD_DREG]); - priv->dreg = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_DREG])); - err = nft_validate_output_register(priv->dreg); - if (err < 0) - return err; - return nft_validate_data_load(ctx, priv->dreg, NULL, NFT_DATA_VALUE); + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, priv->len); } static int nft_payload_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_payload *priv = nft_expr_priv(expr); - if (nla_put_be32(skb, NFTA_PAYLOAD_DREG, htonl(priv->dreg)) || + if (nft_dump_register(skb, NFTA_PAYLOAD_DREG, priv->dreg) || nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) || nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) || nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len))) @@ -131,9 +129,7 @@ nft_payload_select_ops(const struct nft_ctx *ctx, } offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET])); - len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); - if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data)) - return ERR_PTR(-EINVAL); + len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN])); if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER) diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index e8ae2f6bf232..96805d21d618 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -28,7 +28,7 @@ struct nft_queue { }; static void nft_queue_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_queue *priv = nft_expr_priv(expr); @@ -51,7 +51,7 @@ static void nft_queue_eval(const struct nft_expr *expr, if (priv->flags & NFT_QUEUE_FLAG_BYPASS) ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; - data[NFT_REG_VERDICT].verdict = ret; + regs->verdict.code = ret; } static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index 42d0ca45fb9e..1c30f41cff5b 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -30,8 +30,7 @@ struct nft_rbtree_elem { }; -static bool nft_rbtree_lookup(const struct nft_set *set, - const struct nft_data *key, +static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key, const struct nft_set_ext **ext) { const struct nft_rbtree *priv = nft_set_priv(set); @@ -45,7 +44,7 @@ static bool nft_rbtree_lookup(const struct nft_set *set, while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(nft_set_ext_key(&rbe->ext), key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = parent->rb_left; interval = rbe; @@ -91,9 +90,9 @@ static int __nft_rbtree_insert(const struct nft_set *set, while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(nft_set_ext_key(&rbe->ext), - nft_set_ext_key(&new->ext), - set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), + nft_set_ext_key(&new->ext), + set->klen); if (d < 0) p = &parent->rb_left; else if (d > 0) @@ -153,8 +152,8 @@ static void *nft_rbtree_deactivate(const struct nft_set *set, while (parent != NULL) { rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = nft_data_cmp(nft_set_ext_key(&rbe->ext), &elem->key, - set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val, + set->klen); if (d < 0) parent = parent->rb_left; else if (d > 0) diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index d7e9e93a4e90..03f7bf40ae75 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -44,25 +44,28 @@ int nft_redir_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_redir *priv = nft_expr_priv(expr); + unsigned int plen; int err; err = nft_redir_validate(ctx, expr, NULL); if (err < 0) return err; + plen = FIELD_SIZEOF(struct nf_nat_range, min_addr.all); if (tb[NFTA_REDIR_REG_PROTO_MIN]) { priv->sreg_proto_min = - ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MIN])); + nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MIN]); - err = nft_validate_input_register(priv->sreg_proto_min); + err = nft_validate_register_load(priv->sreg_proto_min, plen); if (err < 0) return err; if (tb[NFTA_REDIR_REG_PROTO_MAX]) { priv->sreg_proto_max = - ntohl(nla_get_be32(tb[NFTA_REDIR_REG_PROTO_MAX])); + nft_parse_register(tb[NFTA_REDIR_REG_PROTO_MAX]); - err = nft_validate_input_register(priv->sreg_proto_max); + err = nft_validate_register_load(priv->sreg_proto_max, + plen); if (err < 0) return err; } else { @@ -85,11 +88,11 @@ int nft_redir_dump(struct sk_buff *skb, const struct nft_expr *expr) const struct nft_redir *priv = nft_expr_priv(expr); if (priv->sreg_proto_min) { - if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MIN, - htonl(priv->sreg_proto_min))) + if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MIN, + priv->sreg_proto_min)) goto nla_put_failure; - if (nla_put_be32(skb, NFTA_REDIR_REG_PROTO_MAX, - htonl(priv->sreg_proto_max))) + if (nft_dump_register(skb, NFTA_REDIR_REG_PROTO_MAX, + priv->sreg_proto_max)) goto nla_put_failure; } diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 92877114aff4..62cabee42fbe 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -18,7 +18,7 @@ #include <net/netfilter/ipv6/nf_reject.h> static void nft_reject_inet_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -58,7 +58,8 @@ static void nft_reject_inet_eval(const struct nft_expr *expr, } break; } - data[NFT_REG_VERDICT].verdict = NF_DROP; + + regs->verdict.code = NF_DROP; } static int nft_reject_inet_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index c205b26a2bee..cca96cec1b68 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -272,7 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport, hp->source, lport ? lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; } @@ -437,7 +437,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, tgi->lport ? tgi->lport : hp->dest, skb->dev, NFT_LOOKUP_LISTENER); if (sk2) { - inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_deschedule(inet_twsk(sk)); inet_twsk_put(inet_twsk(sk)); sk = sk2; } diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 7198d660b4de..a1d126f29463 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -39,7 +39,7 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_cgroup_info *info = par->matchinfo; - if (skb->sk == NULL) + if (skb->sk == NULL || !sk_fullsock(skb->sk)) return false; return (info->id == skb->sk->sk_classid) ^ info->invert; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 50a52043650f..1caaccbc306c 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -25,16 +25,15 @@ MODULE_ALIAS("ip6t_physdev"); static bool physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) { - static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct xt_physdev_info *info = par->matchinfo; + const struct net_device *physdev; unsigned long ret; const char *indev, *outdev; - const struct nf_bridge_info *nf_bridge; /* Not a bridged IP packet or no info available yet: * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if * the destination device will be a bridge. */ - if (!(nf_bridge = skb->nf_bridge)) { + if (!skb->nf_bridge) { /* Return MATCH if the invert flags of the used options are on */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && !(info->invert & XT_PHYSDEV_OP_BRIDGED)) @@ -54,30 +53,41 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par) return true; } + physdev = nf_bridge_get_physoutdev(skb); + outdev = physdev ? physdev->name : NULL; + /* This only makes sense in the FORWARD and POSTROUTING chains */ if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) && - (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) + (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED))) return false; + physdev = nf_bridge_get_physindev(skb); + indev = physdev ? physdev->name : NULL; + if ((info->bitmask & XT_PHYSDEV_OP_ISIN && - (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) || + (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) || (info->bitmask & XT_PHYSDEV_OP_ISOUT && - (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT)))) + (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT)))) return false; if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; - indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - ret = ifname_compare_aligned(indev, info->physindev, info->in_mask); - if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) - return false; + if (indev) { + ret = ifname_compare_aligned(indev, info->physindev, + info->in_mask); + + if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) + return false; + } match_outdev: if (!(info->bitmask & XT_PHYSDEV_OP_OUT)) return true; - outdev = nf_bridge->physoutdev ? - nf_bridge->physoutdev->name : nulldevname; + + if (!outdev) + return false; + ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask); return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 895534e87a47..e092cb046326 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -143,13 +143,10 @@ static bool xt_socket_sk_is_transparent(struct sock *sk) } } -static bool -socket_match(const struct sk_buff *skb, struct xt_action_param *par, - const struct xt_socket_mtinfo1 *info) +static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb, + const struct net_device *indev) { const struct iphdr *iph = ip_hdr(skb); - struct udphdr _hdr, *hp = NULL; - struct sock *sk = skb->sk; __be32 uninitialized_var(daddr), uninitialized_var(saddr); __be16 uninitialized_var(dport), uninitialized_var(sport); u8 uninitialized_var(protocol); @@ -159,10 +156,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, #endif if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + struct udphdr _hdr, *hp; + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); if (hp == NULL) - return false; + return NULL; protocol = iph->protocol; saddr = iph->saddr; @@ -172,16 +171,17 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, - &sport, &dport)) - return false; + &sport, &dport)) + return NULL; } else { - return false; + return NULL; } #ifdef XT_SOCKET_HAVE_CONNTRACK - /* Do the lookup with the original socket address in case this is a - * reply packet of an established SNAT-ted connection. */ - + /* Do the lookup with the original socket address in + * case this is a reply packet of an established + * SNAT-ted connection. + */ ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct) && ((iph->protocol != IPPROTO_ICMP && @@ -197,10 +197,18 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, } #endif + return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr, + sport, dport, indev); +} + +static bool +socket_match(const struct sk_buff *skb, struct xt_action_param *par, + const struct xt_socket_mtinfo1 *info) +{ + struct sock *sk = skb->sk; + if (!sk) - sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol, - saddr, daddr, sport, dport, - par->in); + sk = xt_socket_lookup_slow_v4(skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -225,12 +233,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, sk = NULL; } - pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n", - protocol, &saddr, ntohs(sport), - &daddr, ntohs(dport), - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); - - return (sk != NULL); + return sk != NULL; } static bool @@ -327,28 +330,26 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol, return NULL; } -static bool -socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) +static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb, + const struct net_device *indev) { - struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb); - struct udphdr _hdr, *hp = NULL; - struct sock *sk = skb->sk; - const struct in6_addr *daddr = NULL, *saddr = NULL; __be16 uninitialized_var(dport), uninitialized_var(sport); - int thoff = 0, uninitialized_var(tproto); - const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + const struct in6_addr *daddr = NULL, *saddr = NULL; + struct ipv6hdr *iph = ipv6_hdr(skb); + int thoff = 0, tproto; tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL); if (tproto < 0) { pr_debug("unable to find transport header in IPv6 packet, dropping\n"); - return NF_DROP; + return NULL; } if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) { - hp = skb_header_pointer(skb, thoff, - sizeof(_hdr), &_hdr); + struct udphdr _hdr, *hp; + + hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr); if (hp == NULL) - return false; + return NULL; saddr = &iph->saddr; sport = hp->source; @@ -356,17 +357,27 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) dport = hp->dest; } else if (tproto == IPPROTO_ICMPV6) { + struct ipv6hdr ipv6_var; + if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, &sport, &dport, &ipv6_var)) - return false; + return NULL; } else { - return false; + return NULL; } + return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr, + sport, dport, indev); +} + +static bool +socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; + struct sock *sk = skb->sk; + if (!sk) - sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto, - saddr, daddr, sport, dport, - par->in); + sk = xt_socket_lookup_slow_v6(skb, par->in); if (sk) { bool wildcard; bool transparent = true; @@ -391,13 +402,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par) sk = NULL; } - pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu " - "(orig %pI6:%hu) sock %p\n", - tproto, saddr, ntohs(sport), - daddr, ntohs(dport), - &iph->daddr, hp ? ntohs(hp->dest) : 0, sk); - - return (sk != NULL); + return sk != NULL; } #endif diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 9575a1892607..49ff32106080 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -907,6 +907,16 @@ static int nci_se_io(struct nfc_dev *nfc_dev, u32 se_idx, return 0; } +static int nci_fw_download(struct nfc_dev *nfc_dev, const char *firmware_name) +{ + struct nci_dev *ndev = nfc_get_drvdata(nfc_dev); + + if (!ndev->ops->fw_download) + return -ENOTSUPP; + + return ndev->ops->fw_download(ndev, firmware_name); +} + static struct nfc_ops nci_nfc_ops = { .dev_up = nci_dev_up, .dev_down = nci_dev_down, @@ -922,6 +932,7 @@ static struct nfc_ops nci_nfc_ops = { .disable_se = nci_disable_se, .discover_se = nci_discover_se, .se_io = nci_se_io, + .fw_download = nci_fw_download, }; /* ---- Interface to NCI drivers ---- */ diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 14a2d11581da..3763036710ae 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1584,7 +1584,7 @@ static const struct genl_ops nfc_genl_ops[] = { struct urelease_work { struct work_struct w; - int portid; + u32 portid; }; static void nfc_urelease_event_work(struct work_struct *work) diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 50ec42f170a0..2dacc7b5af23 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -100,7 +100,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, new_stats = kmem_cache_alloc_node(flow_stats_cache, - GFP_THISNODE | + GFP_NOWAIT | + __GFP_THISNODE | + __GFP_NOWARN | __GFP_NOMEMALLOC, node); if (likely(new_stats)) { diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c index 3277a7520e31..6d39766e7828 100644 --- a/net/openvswitch/vport-vxlan.c +++ b/net/openvswitch/vport-vxlan.c @@ -222,7 +222,8 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) { struct net *net = ovs_dp_get_net(vport->dp); struct vxlan_port *vxlan_port = vxlan_vport(vport); - __be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport; + struct sock *sk = vxlan_port->vs->sock->sk; + __be16 dst_port = inet_sk(sk)->inet_sport; const struct ovs_key_ipv4_tunnel *tun_key; struct vxlan_metadata md = {0}; struct rtable *rt; @@ -255,7 +256,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb) vxflags = vxlan_port->exts | (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0); - err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst, + err = vxlan_xmit_skb(rt, sk, skb, fl.saddr, tun_key->ipv4_dst, tun_key->ipv4_tos, tun_key->ipv4_ttl, df, src_port, dst_port, &md, false, vxflags); diff --git a/net/rds/connection.c b/net/rds/connection.c index 378c3a6acf84..14f041398ca1 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -130,7 +130,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, rcu_read_lock(); conn = rds_conn_lookup(head, laddr, faddr, trans); if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport && - !is_outgoing) { + laddr == faddr && !is_outgoing) { /* This is a looped back IB connection, and we're * called by the code handling the incoming connect. * We need a second connection object into which we @@ -193,6 +193,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr, } atomic_set(&conn->c_state, RDS_CONN_DOWN); + conn->c_send_gen = 0; conn->c_reconnect_jiffies = 0; INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker); INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker); diff --git a/net/rds/rds.h b/net/rds/rds.h index c3f2855c3d84..0d41155a2258 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -110,6 +110,7 @@ struct rds_connection { void *c_transport_data; atomic_t c_state; + unsigned long c_send_gen; unsigned long c_flags; unsigned long c_reconnect_jiffies; struct delayed_work c_send_w; diff --git a/net/rds/send.c b/net/rds/send.c index 44672befc0ee..e9430f537f9c 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -140,8 +140,11 @@ int rds_send_xmit(struct rds_connection *conn) struct scatterlist *sg; int ret = 0; LIST_HEAD(to_be_dropped); + int batch_count; + unsigned long send_gen = 0; restart: + batch_count = 0; /* * sendmsg calls here after having queued its message on the send @@ -157,6 +160,17 @@ restart: } /* + * we record the send generation after doing the xmit acquire. + * if someone else manages to jump in and do some work, we'll use + * this to avoid a goto restart farther down. + * + * The acquire_in_xmit() check above ensures that only one + * caller can increment c_send_gen at any time. + */ + conn->c_send_gen++; + send_gen = conn->c_send_gen; + + /* * rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT, * we do the opposite to avoid races. */ @@ -202,6 +216,16 @@ restart: if (!rm) { unsigned int len; + batch_count++; + + /* we want to process as big a batch as we can, but + * we also want to avoid softlockups. If we've been + * through a lot of messages, lets back off and see + * if anyone else jumps in + */ + if (batch_count >= 1024) + goto over_batch; + spin_lock_irqsave(&conn->c_lock, flags); if (!list_empty(&conn->c_send_queue)) { @@ -357,9 +381,9 @@ restart: } } +over_batch: if (conn->c_trans->xmit_complete) conn->c_trans->xmit_complete(conn); - release_in_xmit(conn); /* Nuke any messages we decided not to retransmit. */ @@ -380,10 +404,15 @@ restart: * If the transport cannot continue (i.e ret != 0), then it must * call us when more room is available, such as from the tx * completion handler. + * + * We have an extra generation check here so that if someone manages + * to jump in after our release_in_xmit, we'll see that they have done + * some work and we will skip our goto */ if (ret == 0) { smp_mb(); - if (!list_empty(&conn->c_send_queue)) { + if (!list_empty(&conn->c_send_queue) && + send_gen == conn->c_send_gen) { rds_stats_inc(s_send_lock_queue_raced); goto restart; } diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index eb5b8445fef9..4cdbfb85686a 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -88,11 +88,19 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch) /* ------------------------------------------------------------- */ +static int ingress_init(struct Qdisc *sch, struct nlattr *opt) +{ + net_inc_ingress_queue(); + + return 0; +} + static void ingress_destroy(struct Qdisc *sch) { struct ingress_qdisc_data *p = qdisc_priv(sch); tcf_destroy_chain(&p->filter_list); + net_dec_ingress_queue(); } static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -124,6 +132,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { .id = "ingress", .priv_size = sizeof(struct ingress_qdisc_data), .enqueue = ingress_enqueue, + .init = ingress_init, .destroy = ingress_destroy, .dump = ingress_dump, .owner = THIS_MODULE, diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 179f1c8c0d8b..956ead2cab9a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -560,8 +560,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) tfifo_dequeue: skb = __skb_dequeue(&sch->q); if (skb) { -deliver: qdisc_qstats_backlog_dec(sch, skb); +deliver: qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); return skb; @@ -578,6 +578,7 @@ deliver: rb_erase(p, &q->t_root); sch->q.qlen--; + qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; skb->prev = NULL; skb->tstamp = netem_skb_cb(skb)->tstamp_save; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index fb78117b896c..9068e72aa73c 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -1,9 +1,11 @@ config SUNRPC tristate + depends on MULTIUSER config SUNRPC_GSS tristate select OID_REGISTRY + depends on MULTIUSER config SUNRPC_BACKCHANNEL bool diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 5199bb1a017e..2928afffbb81 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -1072,10 +1072,12 @@ void qword_add(char **bpp, int *lp, char *str) if (len < 0) return; - ret = string_escape_str(str, &bp, len, ESCAPE_OCTAL, "\\ \n\t"); - if (ret < 0 || ret == len) + ret = string_escape_str(str, bp, len, ESCAPE_OCTAL, "\\ \n\t"); + if (ret >= len) { + bp += len; len = -1; - else { + } else { + bp += ret; len -= ret; *bp++ = ' '; len--; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 124676c13780..e28909fddd30 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1136,7 +1136,7 @@ rpcrdma_init_fmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) int i, rc; i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i); + dprintk("RPC: %s: initializing %d FMRs\n", __func__, i); while (i--) { r = kzalloc(sizeof(*r), GFP_KERNEL); @@ -1169,7 +1169,7 @@ rpcrdma_init_frmrs(struct rpcrdma_ia *ia, struct rpcrdma_buffer *buf) int i, rc; i = (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS; - dprintk("RPC: %s: initalizing %d FRMRs\n", __func__, i); + dprintk("RPC: %s: initializing %d FRMRs\n", __func__, i); while (i--) { r = kzalloc(sizeof(*r), GFP_KERNEL); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ef3d7aa2854a..66deebc66aa1 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -176,7 +176,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, goto tx_error; } ttl = ip4_dst_hoplimit(&rt->dst); - err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr, + err = udp_tunnel_xmit_skb(rt, ub->ubsock->sk, clone, + src->ipv4.s_addr, dst->ipv4.s_addr, 0, ttl, 0, src->udp_port, dst->udp_port, false, true); @@ -197,7 +198,8 @@ static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, if (err) goto tx_error; ttl = ip6_dst_hoplimit(ndst); - err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6, + err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, clone, + ndst->dev, &src->ipv6, &dst->ipv6, 0, ttl, src->udp_port, dst->udp_port, false); #endif diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index b13dfb4ff001..4f5543dd2524 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -175,7 +175,7 @@ config CFG80211_INTERNAL_REGDB Most distributions have a CRDA package. So if unsure, say N. config CFG80211_WEXT - bool "cfg80211 wireless extensions compatibility" + bool "cfg80211 wireless extensions compatibility" if !CFG80211_WEXT_EXPORT depends on CFG80211 select WEXT_CORE default y if CFG80211_WEXT_EXPORT diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 6dd1ab3b10ea..dd78445c7d50 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5664,7 +5664,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) } } - r = set_regdom(rd); + r = set_regdom(rd, REGD_SOURCE_CRDA); /* set_regdom took ownership */ rd = NULL; diff --git a/net/wireless/reg.c b/net/wireless/reg.c index be5f81caa488..0e347f888fe9 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -135,6 +135,11 @@ static spinlock_t reg_indoor_lock; /* Used to track the userspace process controlling the indoor setting */ static u32 reg_is_indoor_portid; +/* Max number of consecutive attempts to communicate with CRDA */ +#define REG_MAX_CRDA_TIMEOUTS 10 + +static u32 reg_crda_timeouts; + static const struct ieee80211_regdomain *get_cfg80211_regdom(void) { return rtnl_dereference(cfg80211_regdomain); @@ -485,7 +490,7 @@ static void reg_regdb_search(struct work_struct *work) mutex_unlock(®_regdb_search_mutex); if (!IS_ERR_OR_NULL(regdom)) - set_regdom(regdom); + set_regdom(regdom, REGD_SOURCE_INTERNAL_DB); rtnl_unlock(); } @@ -535,15 +540,20 @@ static int call_crda(const char *alpha2) snprintf(country, sizeof(country), "COUNTRY=%c%c", alpha2[0], alpha2[1]); + /* query internal regulatory database (if it exists) */ + reg_regdb_query(alpha2); + + if (reg_crda_timeouts > REG_MAX_CRDA_TIMEOUTS) { + pr_info("Exceeded CRDA call max attempts. Not calling CRDA\n"); + return -EINVAL; + } + if (!is_world_regdom((char *) alpha2)) pr_info("Calling CRDA for country: %c%c\n", alpha2[0], alpha2[1]); else pr_info("Calling CRDA to update world regulatory domain\n"); - /* query internal regulatory database (if it exists) */ - reg_regdb_query(alpha2); - return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, env); } @@ -2293,6 +2303,9 @@ int regulatory_hint_user(const char *alpha2, request->initiator = NL80211_REGDOM_SET_BY_USER; request->user_reg_hint_type = user_reg_hint_type; + /* Allow calling CRDA again */ + reg_crda_timeouts = 0; + queue_regulatory_request(request); return 0; @@ -2362,6 +2375,9 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2) request->alpha2[1] = alpha2[1]; request->initiator = NL80211_REGDOM_SET_BY_DRIVER; + /* Allow calling CRDA again */ + reg_crda_timeouts = 0; + queue_regulatory_request(request); return 0; @@ -2415,6 +2431,9 @@ void regulatory_hint_country_ie(struct wiphy *wiphy, enum ieee80211_band band, request->initiator = NL80211_REGDOM_SET_BY_COUNTRY_IE; request->country_ie_env = env; + /* Allow calling CRDA again */ + reg_crda_timeouts = 0; + queue_regulatory_request(request); request = NULL; out: @@ -2893,7 +2912,8 @@ static int reg_set_rd_country_ie(const struct ieee80211_regdomain *rd, * multiple drivers can be ironed out later. Caller must've already * kmalloc'd the rd structure. */ -int set_regdom(const struct ieee80211_regdomain *rd) +int set_regdom(const struct ieee80211_regdomain *rd, + enum ieee80211_regd_source regd_src) { struct regulatory_request *lr; bool user_reset = false; @@ -2904,6 +2924,9 @@ int set_regdom(const struct ieee80211_regdomain *rd) return -EINVAL; } + if (regd_src == REGD_SOURCE_CRDA) + reg_crda_timeouts = 0; + lr = get_last_request(); /* Note that this doesn't update the wiphys, this is done below */ @@ -3063,6 +3086,7 @@ static void reg_timeout_work(struct work_struct *work) { REG_DBG_PRINT("Timeout while waiting for CRDA to reply, restoring regulatory settings\n"); rtnl_lock(); + reg_crda_timeouts++; restore_regulatory_settings(true); rtnl_unlock(); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index a2c4e16459da..9f495d76eca0 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -16,6 +16,11 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +enum ieee80211_regd_source { + REGD_SOURCE_INTERNAL_DB, + REGD_SOURCE_CRDA, +}; + extern const struct ieee80211_regdomain __rcu *cfg80211_regdomain; bool reg_is_valid_request(const char *alpha2); @@ -46,7 +51,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy); int __init regulatory_init(void); void regulatory_exit(void); -int set_regdom(const struct ieee80211_regdomain *rd); +int set_regdom(const struct ieee80211_regdomain *rd, + enum ieee80211_regd_source regd_src); + unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd, const struct ieee80211_reg_rule *rule); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index ea1da6621ff0..d11454f87bac 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -42,7 +42,7 @@ struct cfg80211_conn { CFG80211_CONN_CONNECTED, } state; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; - u8 *ie; + const u8 *ie; size_t ie_len; bool auto_auth, prev_bssid_valid; }; @@ -423,6 +423,62 @@ void cfg80211_sme_assoc_timeout(struct wireless_dev *wdev) schedule_work(&rdev->conn_work); } +static int cfg80211_sme_get_conn_ies(struct wireless_dev *wdev, + const u8 *ies, size_t ies_len, + const u8 **out_ies, size_t *out_ies_len) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + u8 *buf; + size_t offs; + + if (!rdev->wiphy.extended_capabilities_len || + (ies && cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY, ies, ies_len))) { + *out_ies = kmemdup(ies, ies_len, GFP_KERNEL); + if (!*out_ies) + return -ENOMEM; + *out_ies_len = ies_len; + return 0; + } + + buf = kmalloc(ies_len + rdev->wiphy.extended_capabilities_len + 2, + GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (ies_len) { + static const u8 before_extcapa[] = { + /* not listing IEs expected to be created by driver */ + WLAN_EID_RSN, + WLAN_EID_QOS_CAPA, + WLAN_EID_RRM_ENABLED_CAPABILITIES, + WLAN_EID_MOBILITY_DOMAIN, + WLAN_EID_SUPPORTED_REGULATORY_CLASSES, + WLAN_EID_BSS_COEX_2040, + }; + + offs = ieee80211_ie_split(ies, ies_len, before_extcapa, + ARRAY_SIZE(before_extcapa), 0); + memcpy(buf, ies, offs); + /* leave a whole for extended capabilities IE */ + memcpy(buf + offs + rdev->wiphy.extended_capabilities_len + 2, + ies + offs, ies_len - offs); + } else { + offs = 0; + } + + /* place extended capabilities IE (with only driver capabilities) */ + buf[offs] = WLAN_EID_EXT_CAPABILITY; + buf[offs + 1] = rdev->wiphy.extended_capabilities_len; + memcpy(buf + offs + 2, + rdev->wiphy.extended_capabilities, + rdev->wiphy.extended_capabilities_len); + + *out_ies = buf; + *out_ies_len = ies_len + rdev->wiphy.extended_capabilities_len + 2; + + return 0; +} + static int cfg80211_sme_connect(struct wireless_dev *wdev, struct cfg80211_connect_params *connect, const u8 *prev_bssid) @@ -453,16 +509,14 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, memcpy(wdev->conn->bssid, connect->bssid, ETH_ALEN); } - if (connect->ie) { - wdev->conn->ie = kmemdup(connect->ie, connect->ie_len, - GFP_KERNEL); - wdev->conn->params.ie = wdev->conn->ie; - if (!wdev->conn->ie) { - kfree(wdev->conn); - wdev->conn = NULL; - return -ENOMEM; - } + if (cfg80211_sme_get_conn_ies(wdev, connect->ie, connect->ie_len, + &wdev->conn->ie, + &wdev->conn->params.ie_len)) { + kfree(wdev->conn); + wdev->conn = NULL; + return -ENOMEM; } + wdev->conn->params.ie = wdev->conn->ie; if (connect->auth_type == NL80211_AUTHTYPE_AUTOMATIC) { wdev->conn->auto_auth = true; diff --git a/net/wireless/util.c b/net/wireless/util.c index f218b151530a..70051ab52f4f 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1290,6 +1290,47 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, } EXPORT_SYMBOL(cfg80211_get_p2p_attr); +static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) +{ + int i; + + for (i = 0; i < n_ids; i++) + if (ids[i] == id) + return true; + return false; +} + +size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, + const u8 *ids, int n_ids, + const u8 *after_ric, int n_after_ric, + size_t offset) +{ + size_t pos = offset; + + while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { + if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { + pos += 2 + ies[pos + 1]; + + while (pos < ielen && + !ieee80211_id_in_list(after_ric, n_after_ric, + ies[pos])) + pos += 2 + ies[pos + 1]; + } else { + pos += 2 + ies[pos + 1]; + } + } + + return pos; +} +EXPORT_SYMBOL(ieee80211_ie_split_ric); + +size_t ieee80211_ie_split(const u8 *ies, size_t ielen, + const u8 *ids, int n_ids, size_t offset) +{ + return ieee80211_ie_split_ric(ies, ielen, ids, n_ids, NULL, 0, offset); +} +EXPORT_SYMBOL(ieee80211_ie_split); + bool ieee80211_operating_class_to_band(u8 operating_class, enum ieee80211_band *band) { diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 85d1d4764612..526c4feb3b50 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -238,11 +238,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) skb->sp->xvec[skb->sp->len++] = x; - if (xfrm_tunnel_check(skb, x, family)) { - XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); - goto drop; - } - spin_lock(&x->lock); if (unlikely(x->km.state == XFRM_STATE_ACQ)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR); @@ -271,6 +266,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) spin_unlock(&x->lock); + if (xfrm_tunnel_check(skb, x, family)) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR); + goto drop; + } + seq_hi = htonl(xfrm_replay_seqhi(x, seq)); XFRM_SKB_CB(skb)->seq.input.low = seq; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 7c532856b398..fbcedbe33190 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -19,7 +19,7 @@ #include <net/dst.h> #include <net/xfrm.h> -static int xfrm_output2(struct sk_buff *skb); +static int xfrm_output2(struct sock *sk, struct sk_buff *skb); static int xfrm_skb_check_space(struct sk_buff *skb) { @@ -130,7 +130,7 @@ int xfrm_output_resume(struct sk_buff *skb, int err) return dst_output(skb); err = nf_hook(skb_dst(skb)->ops->family, - NF_INET_POST_ROUTING, skb, + NF_INET_POST_ROUTING, skb->sk, skb, NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; @@ -144,12 +144,12 @@ out: } EXPORT_SYMBOL_GPL(xfrm_output_resume); -static int xfrm_output2(struct sk_buff *skb) +static int xfrm_output2(struct sock *sk, struct sk_buff *skb) { return xfrm_output_resume(skb, 1); } -static int xfrm_output_gso(struct sk_buff *skb) +static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb) { struct sk_buff *segs; @@ -165,7 +165,7 @@ static int xfrm_output_gso(struct sk_buff *skb) int err; segs->next = NULL; - err = xfrm_output2(segs); + err = xfrm_output2(sk, segs); if (unlikely(err)) { kfree_skb_list(nskb); @@ -178,13 +178,13 @@ static int xfrm_output_gso(struct sk_buff *skb) return 0; } -int xfrm_output(struct sk_buff *skb) +int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); int err; if (skb_is_gso(skb)) - return xfrm_output_gso(skb); + return xfrm_output_gso(sk, skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); @@ -195,7 +195,7 @@ int xfrm_output(struct sk_buff *skb) } } - return xfrm_output2(skb); + return xfrm_output2(sk, skb); } EXPORT_SYMBOL_GPL(xfrm_output); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7de2ed9ec46d..2091664295ba 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2423,6 +2423,11 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) const struct xfrm_link *link; int type, err; +#ifdef CONFIG_COMPAT + if (is_compat_task()) + return -ENOTSUPP; +#endif + type = nlh->nlmsg_type; if (type > XFRM_MSG_MAX) return -EINVAL; |