diff options
Diffstat (limited to 'net')
267 files changed, 14737 insertions, 3102 deletions
diff --git a/net/Kconfig b/net/Kconfig index 57f51a279ad6..3101bfcbdd7a 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -430,6 +430,7 @@ config NET_SOCK_MSG config NET_DEVLINK bool default n + imply NET_DROP_MONITOR config PAGE_POOL bool diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index a8cb6b2e20c1..4072e9d394d6 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -953,8 +953,8 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, if (copy > len) copy = len; vaddr = kmap_atomic(skb_frag_page(frag)); - sum = atalk_sum_partial(vaddr + frag->page_offset + - offset - start, copy, sum); + sum = atalk_sum_partial(vaddr + skb_frag_off(frag) + + offset - start, copy, sum); kunmap_atomic(vaddr); if (!(len -= copy)) diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 4bb418313720..3286f9d527d3 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -180,8 +180,7 @@ static int cache_hit(in_cache_entry *entry, struct mpoa_client *mpc) static void in_cache_put(in_cache_entry *entry) { if (refcount_dec_and_test(&entry->use)) { - memset(entry, 0, sizeof(in_cache_entry)); - kfree(entry); + kzfree(entry); } } @@ -416,8 +415,7 @@ static eg_cache_entry *eg_cache_get_by_src_ip(__be32 ipaddr, static void eg_cache_put(eg_cache_entry *entry) { if (refcount_dec_and_test(&entry->use)) { - memset(entry, 0, sizeof(eg_cache_entry)); - kfree(entry); + kzfree(entry); } } diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index bd3da9af5ef6..45d8e1d5d033 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -216,9 +216,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) pvcc->chan.mtu += LLC_LEN; break; } - pr_debug("Couldn't autodetect yet (skb: %02X %02X %02X %02X %02X %02X)\n", - skb->data[0], skb->data[1], skb->data[2], - skb->data[3], skb->data[4], skb->data[5]); + pr_debug("Couldn't autodetect yet (skb: %6ph)\n", skb->data); goto error; case e_vc: break; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 22672cb3e25d..64054edc2e3c 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -79,6 +79,7 @@ static int batadv_v_iface_enable(struct batadv_hard_iface *hard_iface) static void batadv_v_iface_disable(struct batadv_hard_iface *hard_iface) { + batadv_v_ogm_iface_disable(hard_iface); batadv_v_elp_iface_disable(hard_iface); } @@ -1081,6 +1082,12 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) */ atomic_set(&hard_iface->bat_v.throughput_override, 0); atomic_set(&hard_iface->bat_v.elp_interval, 500); + + hard_iface->bat_v.aggr_len = 0; + skb_queue_head_init(&hard_iface->bat_v.aggr_list); + spin_lock_init(&hard_iface->bat_v.aggr_list_lock); + INIT_DELAYED_WORK(&hard_iface->bat_v.aggr_wq, + batadv_v_ogm_aggr_work); } /** diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index bc06e3cdfa84..dc4f7430cb5a 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -17,12 +17,14 @@ #include <linux/kernel.h> #include <linux/kref.h> #include <linux/list.h> +#include <linux/lockdep.h> #include <linux/netdevice.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> +#include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> @@ -77,6 +79,20 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, } /** + * batadv_v_ogm_start_queue_timer() - restart the OGM aggregation timer + * @hard_iface: the interface to use to send the OGM + */ +static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface) +{ + unsigned int msecs = BATADV_MAX_AGGREGATION_MS * 1000; + + /* msecs * [0.9, 1.1] */ + msecs += prandom_u32() % (msecs / 5) - (msecs / 10); + queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.aggr_wq, + msecs_to_jiffies(msecs / 1000)); +} + +/** * batadv_v_ogm_start_timer() - restart the OGM sending timer * @bat_priv: the bat priv with all the soft interface information */ @@ -116,6 +132,130 @@ static void batadv_v_ogm_send_to_if(struct sk_buff *skb, } /** + * batadv_v_ogm_len() - OGMv2 packet length + * @skb: the OGM to check + * + * Return: Length of the given OGMv2 packet, including tvlv length, excluding + * ethernet header length. + */ +static unsigned int batadv_v_ogm_len(struct sk_buff *skb) +{ + struct batadv_ogm2_packet *ogm_packet; + + ogm_packet = (struct batadv_ogm2_packet *)skb->data; + return BATADV_OGM2_HLEN + ntohs(ogm_packet->tvlv_len); +} + +/** + * batadv_v_ogm_queue_left() - check if given OGM still fits aggregation queue + * @skb: the OGM to check + * @hard_iface: the interface to use to send the OGM + * + * Caller needs to hold the hard_iface->bat_v.aggr_list_lock. + * + * Return: True, if the given OGMv2 packet still fits, false otherwise. + */ +static bool batadv_v_ogm_queue_left(struct sk_buff *skb, + struct batadv_hard_iface *hard_iface) +{ + unsigned int max = min_t(unsigned int, hard_iface->net_dev->mtu, + BATADV_MAX_AGGREGATION_BYTES); + unsigned int ogm_len = batadv_v_ogm_len(skb); + + lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock); + + return hard_iface->bat_v.aggr_len + ogm_len <= max; +} + +/** + * batadv_v_ogm_aggr_list_free - free all elements in an aggregation queue + * @hard_iface: the interface holding the aggregation queue + * + * Empties the OGMv2 aggregation queue and frees all the skbs it contained. + * + * Caller needs to hold the hard_iface->bat_v.aggr_list_lock. + */ +static void batadv_v_ogm_aggr_list_free(struct batadv_hard_iface *hard_iface) +{ + struct sk_buff *skb; + + lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock); + + while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list))) + kfree_skb(skb); + + hard_iface->bat_v.aggr_len = 0; +} + +/** + * batadv_v_ogm_aggr_send() - flush & send aggregation queue + * @hard_iface: the interface with the aggregation queue to flush + * + * Aggregates all OGMv2 packets currently in the aggregation queue into a + * single OGMv2 packet and transmits this aggregate. + * + * The aggregation queue is empty after this call. + * + * Caller needs to hold the hard_iface->bat_v.aggr_list_lock. + */ +static void batadv_v_ogm_aggr_send(struct batadv_hard_iface *hard_iface) +{ + unsigned int aggr_len = hard_iface->bat_v.aggr_len; + struct sk_buff *skb_aggr; + unsigned int ogm_len; + struct sk_buff *skb; + + lockdep_assert_held(&hard_iface->bat_v.aggr_list_lock); + + if (!aggr_len) + return; + + skb_aggr = dev_alloc_skb(aggr_len + ETH_HLEN + NET_IP_ALIGN); + if (!skb_aggr) { + batadv_v_ogm_aggr_list_free(hard_iface); + return; + } + + skb_reserve(skb_aggr, ETH_HLEN + NET_IP_ALIGN); + skb_reset_network_header(skb_aggr); + + while ((skb = skb_dequeue(&hard_iface->bat_v.aggr_list))) { + hard_iface->bat_v.aggr_len -= batadv_v_ogm_len(skb); + + ogm_len = batadv_v_ogm_len(skb); + skb_put_data(skb_aggr, skb->data, ogm_len); + + consume_skb(skb); + } + + batadv_v_ogm_send_to_if(skb_aggr, hard_iface); +} + +/** + * batadv_v_ogm_queue_on_if() - queue a batman ogm on a given interface + * @skb: the OGM to queue + * @hard_iface: the interface to queue the OGM on + */ +static void batadv_v_ogm_queue_on_if(struct sk_buff *skb, + struct batadv_hard_iface *hard_iface) +{ + struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + + if (!atomic_read(&bat_priv->aggregated_ogms)) { + batadv_v_ogm_send_to_if(skb, hard_iface); + return; + } + + spin_lock_bh(&hard_iface->bat_v.aggr_list_lock); + if (!batadv_v_ogm_queue_left(skb, hard_iface)) + batadv_v_ogm_aggr_send(hard_iface); + + hard_iface->bat_v.aggr_len += batadv_v_ogm_len(skb); + skb_queue_tail(&hard_iface->bat_v.aggr_list, skb); + spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock); +} + +/** * batadv_v_ogm_send() - periodic worker broadcasting the own OGM * @work: work queue item */ @@ -210,7 +350,7 @@ static void batadv_v_ogm_send(struct work_struct *work) break; } - batadv_v_ogm_send_to_if(skb_tmp, hard_iface); + batadv_v_ogm_queue_on_if(skb_tmp, hard_iface); batadv_hardif_put(hard_iface); } rcu_read_unlock(); @@ -224,6 +364,27 @@ out: } /** + * batadv_v_ogm_aggr_work() - OGM queue periodic task per interface + * @work: work queue item + * + * Emits aggregated OGM message in regular intervals. + */ +void batadv_v_ogm_aggr_work(struct work_struct *work) +{ + struct batadv_hard_iface_bat_v *batv; + struct batadv_hard_iface *hard_iface; + + batv = container_of(work, struct batadv_hard_iface_bat_v, aggr_wq.work); + hard_iface = container_of(batv, struct batadv_hard_iface, bat_v); + + spin_lock_bh(&hard_iface->bat_v.aggr_list_lock); + batadv_v_ogm_aggr_send(hard_iface); + spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock); + + batadv_v_ogm_start_queue_timer(hard_iface); +} + +/** * batadv_v_ogm_iface_enable() - prepare an interface for B.A.T.M.A.N. V * @hard_iface: the interface to prepare * @@ -235,12 +396,26 @@ int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); + batadv_v_ogm_start_queue_timer(hard_iface); batadv_v_ogm_start_timer(bat_priv); return 0; } /** + * batadv_v_ogm_iface_disable() - release OGM interface private resources + * @hard_iface: interface for which the resources have to be released + */ +void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface) +{ + cancel_delayed_work_sync(&hard_iface->bat_v.aggr_wq); + + spin_lock_bh(&hard_iface->bat_v.aggr_list_lock); + batadv_v_ogm_aggr_list_free(hard_iface); + spin_unlock_bh(&hard_iface->bat_v.aggr_list_lock); +} + +/** * batadv_v_ogm_primary_iface_set() - set a new primary interface * @primary_iface: the new primary interface */ @@ -382,7 +557,7 @@ static void batadv_v_ogm_forward(struct batadv_priv *bat_priv, if_outgoing->net_dev->name, ntohl(ogm_forward->throughput), ogm_forward->ttl, if_incoming->net_dev->name); - batadv_v_ogm_send_to_if(skb, if_outgoing); + batadv_v_ogm_queue_on_if(skb, if_outgoing); out: if (orig_ifinfo) diff --git a/net/batman-adv/bat_v_ogm.h b/net/batman-adv/bat_v_ogm.h index 2a50df7fc2bf..bf16d040461d 100644 --- a/net/batman-adv/bat_v_ogm.h +++ b/net/batman-adv/bat_v_ogm.h @@ -11,10 +11,13 @@ #include <linux/skbuff.h> #include <linux/types.h> +#include <linux/workqueue.h> int batadv_v_ogm_init(struct batadv_priv *bat_priv); void batadv_v_ogm_free(struct batadv_priv *bat_priv); +void batadv_v_ogm_aggr_work(struct work_struct *work); int batadv_v_ogm_iface_enable(struct batadv_hard_iface *hard_iface); +void batadv_v_ogm_iface_disable(struct batadv_hard_iface *hard_iface); struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr); void batadv_v_ogm_primary_iface_set(struct batadv_hard_iface *primary_iface); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 3d4c04d87ff3..6967f2e4c3f4 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2019.3" +#define BATADV_SOURCE_VERSION "2019.4" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index c7a2e77ca1da..a1146cb10919 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -943,10 +943,10 @@ static const struct net_device_ops batadv_netdev_ops = { static void batadv_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { - strlcpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); - strlcpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); - strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); - strlcpy(info->bus_info, "batman", sizeof(info->bus_info)); + strscpy(info->driver, "B.A.T.M.A.N. advanced", sizeof(info->driver)); + strscpy(info->version, BATADV_SOURCE_VERSION, sizeof(info->version)); + strscpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strscpy(info->bus_info, "batman", sizeof(info->bus_info)); } /* Inspired by drivers/net/ethernet/dlink/sundance.c:1702 diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 1efcb97039cd..e5bbc28ed12c 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1070,7 +1070,7 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, dev_hold(net_dev); INIT_WORK(&store_work->work, batadv_store_mesh_iface_work); store_work->net_dev = net_dev; - strlcpy(store_work->soft_iface_name, buff, + strscpy(store_work->soft_iface_name, buff, sizeof(store_work->soft_iface_name)); queue_work(batadv_event_workqueue, &store_work->work); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 6ae139d74e0f..be7c02aa91e2 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -117,6 +117,18 @@ struct batadv_hard_iface_bat_v { /** @elp_wq: workqueue used to schedule ELP transmissions */ struct delayed_work elp_wq; + /** @aggr_wq: workqueue used to transmit queued OGM packets */ + struct delayed_work aggr_wq; + + /** @aggr_list: queue for to be aggregated OGM packets */ + struct sk_buff_head aggr_list; + + /** @aggr_len: size of the OGM aggregate (excluding ethernet header) */ + unsigned int aggr_len; + + /** @aggr_list_lock: protects aggr_list */ + spinlock_t aggr_list_lock; + /** * @throughput_override: throughput override to disable link * auto-detection diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index 9d41de1ec90f..bb55d92691b0 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -583,7 +583,7 @@ static const struct net_device_ops netdev_ops = { .ndo_start_xmit = bt_xmit, }; -static struct header_ops header_ops = { +static const struct header_ops header_ops = { .create = header_create, }; diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 621f1a97d803..7f6a581b5b7e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -1054,6 +1054,7 @@ void __hci_req_enable_advertising(struct hci_request *req) struct hci_cp_le_set_adv_param cp; u8 own_addr_type, enable = 0x01; bool connectable; + u16 adv_min_interval, adv_max_interval; u32 flags; flags = get_adv_instance_flags(hdev, hdev->cur_adv_instance); @@ -1087,16 +1088,30 @@ void __hci_req_enable_advertising(struct hci_request *req) return; memset(&cp, 0, sizeof(cp)); - cp.min_interval = cpu_to_le16(hdev->le_adv_min_interval); - cp.max_interval = cpu_to_le16(hdev->le_adv_max_interval); - if (connectable) + if (connectable) { cp.type = LE_ADV_IND; - else if (get_cur_adv_instance_scan_rsp_len(hdev)) - cp.type = LE_ADV_SCAN_IND; - else - cp.type = LE_ADV_NONCONN_IND; + adv_min_interval = hdev->le_adv_min_interval; + adv_max_interval = hdev->le_adv_max_interval; + } else { + if (get_cur_adv_instance_scan_rsp_len(hdev)) + cp.type = LE_ADV_SCAN_IND; + else + cp.type = LE_ADV_NONCONN_IND; + + if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) || + hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { + adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN; + adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX; + } else { + adv_min_interval = hdev->le_adv_min_interval; + adv_max_interval = hdev->le_adv_max_interval; + } + } + + cp.min_interval = cpu_to_le16(adv_min_interval); + cp.max_interval = cpu_to_le16(adv_max_interval); cp.own_address_type = own_addr_type; cp.channel_map = hdev->le_adv_channel_map; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 8d889969ae7e..bef84b95e2c4 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -267,7 +267,7 @@ static int hidp_get_raw_report(struct hid_device *hid, set_bit(HIDP_WAITING_FOR_RETURN, &session->flags); data[0] = report_number; ret = hidp_send_ctrl_message(session, report_type, data, 1); - if (ret) + if (ret < 0) goto err; /* Wait for the return of the report. The returned report @@ -343,7 +343,7 @@ static int hidp_set_raw_report(struct hid_device *hid, unsigned char reportnum, data[0] = reportnum; set_bit(HIDP_WAITING_FOR_SEND_ACK, &session->flags); ret = hidp_send_ctrl_message(session, report_type, data, count); - if (ret) + if (ret < 0) goto err; /* Wait for the ACK from the device. */ diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 150114e33b20..acb7c6d5643f 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2588,7 +2588,6 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, { struct mgmt_rp_get_connections *rp; struct hci_conn *c; - size_t rp_len; int err; u16 i; @@ -2608,8 +2607,7 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, i++; } - rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info)); - rp = kmalloc(rp_len, GFP_KERNEL); + rp = kmalloc(struct_size(rp, addr, i), GFP_KERNEL); if (!rp) { err = -ENOMEM; goto unlock; @@ -2629,10 +2627,8 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data, rp->conn_count = cpu_to_le16(i); /* Recalculate length in case of filtered SCO connections, etc */ - rp_len = sizeof(*rp) + (i * sizeof(struct mgmt_addr_info)); - err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_GET_CONNECTIONS, 0, rp, - rp_len); + struct_size(rp, addr, i)); kfree(rp); diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 80e6f3a6864d..1153bbcdff72 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -377,6 +377,22 @@ out: return ret; } +static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx) +{ + /* make sure the fields we don't use are zeroed */ + if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags))) + return -EINVAL; + + /* flags is allowed */ + + if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) + + FIELD_SIZEOF(struct bpf_flow_keys, flags), + sizeof(struct bpf_flow_keys))) + return -EINVAL; + + return 0; +} + int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr) @@ -384,9 +400,11 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, u32 size = kattr->test.data_size_in; struct bpf_flow_dissector ctx = {}; u32 repeat = kattr->test.repeat; + struct bpf_flow_keys *user_ctx; struct bpf_flow_keys flow_keys; u64 time_start, time_spent = 0; const struct ethhdr *eth; + unsigned int flags = 0; u32 retval, duration; void *data; int ret; @@ -395,9 +413,6 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR) return -EINVAL; - if (kattr->test.ctx_in || kattr->test.ctx_out) - return -EINVAL; - if (size < ETH_HLEN) return -EINVAL; @@ -410,6 +425,18 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, if (!repeat) repeat = 1; + user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys)); + if (IS_ERR(user_ctx)) { + kfree(data); + return PTR_ERR(user_ctx); + } + if (user_ctx) { + ret = verify_user_bpf_flow_keys(user_ctx); + if (ret) + goto out; + flags = user_ctx->flags; + } + ctx.flow_keys = &flow_keys; ctx.data = data; ctx.data_end = (__u8 *)data + size; @@ -419,7 +446,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN, - size); + size, flags); if (signal_pending(current)) { preempt_enable(); @@ -450,8 +477,12 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys), retval, duration); + if (!ret) + ret = bpf_ctx_finish(kattr, uattr, user_ctx, + sizeof(struct bpf_flow_keys)); out: + kfree(user_ctx); kfree(data); return ret; } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 63f9c08625f0..da5ed4cf9233 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -60,6 +60,8 @@ static void __mdb_entry_fill_flags(struct br_mdb_entry *e, unsigned char flags) e->flags = 0; if (flags & MDB_PG_FLAGS_OFFLOAD) e->flags |= MDB_FLAGS_OFFLOAD; + if (flags & MDB_PG_FLAGS_FAST_LEAVE) + e->flags |= MDB_FLAGS_FAST_LEAVE; } static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip) @@ -75,6 +77,53 @@ static void __mdb_entry_to_br_ip(struct br_mdb_entry *entry, struct br_ip *ip) #endif } +static int __mdb_fill_info(struct sk_buff *skb, + struct net_bridge_mdb_entry *mp, + struct net_bridge_port_group *p) +{ + struct timer_list *mtimer; + struct nlattr *nest_ent; + struct br_mdb_entry e; + u8 flags = 0; + int ifindex; + + memset(&e, 0, sizeof(e)); + if (p) { + ifindex = p->port->dev->ifindex; + mtimer = &p->timer; + flags = p->flags; + } else { + ifindex = mp->br->dev->ifindex; + mtimer = &mp->timer; + } + + __mdb_entry_fill_flags(&e, flags); + e.ifindex = ifindex; + e.vid = mp->addr.vid; + if (mp->addr.proto == htons(ETH_P_IP)) + e.addr.u.ip4 = mp->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + if (mp->addr.proto == htons(ETH_P_IPV6)) + e.addr.u.ip6 = mp->addr.u.ip6; +#endif + e.addr.proto = mp->addr.proto; + nest_ent = nla_nest_start_noflag(skb, + MDBA_MDB_ENTRY_INFO); + if (!nest_ent) + return -EMSGSIZE; + + if (nla_put_nohdr(skb, sizeof(e), &e) || + nla_put_u32(skb, + MDBA_MDB_EATTR_TIMER, + br_timer_value(mtimer))) { + nla_nest_cancel(skb, nest_ent); + return -EMSGSIZE; + } + nla_nest_end(skb, nest_ent); + + return 0; +} + static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev) { @@ -93,7 +142,6 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, hlist_for_each_entry_rcu(mp, &br->mdb_list, mdb_node) { struct net_bridge_port_group *p; struct net_bridge_port_group __rcu **pp; - struct net_bridge_port *port; if (idx < s_idx) goto skip; @@ -104,43 +152,24 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, break; } + if (mp->host_joined) { + err = __mdb_fill_info(skb, mp, NULL); + if (err) { + nla_nest_cancel(skb, nest2); + break; + } + } + for (pp = &mp->ports; (p = rcu_dereference(*pp)) != NULL; pp = &p->next) { - struct nlattr *nest_ent; - struct br_mdb_entry e; - - port = p->port; - if (!port) + if (!p->port) continue; - memset(&e, 0, sizeof(e)); - e.ifindex = port->dev->ifindex; - e.vid = p->addr.vid; - __mdb_entry_fill_flags(&e, p->flags); - if (p->addr.proto == htons(ETH_P_IP)) - e.addr.u.ip4 = p->addr.u.ip4; -#if IS_ENABLED(CONFIG_IPV6) - if (p->addr.proto == htons(ETH_P_IPV6)) - e.addr.u.ip6 = p->addr.u.ip6; -#endif - e.addr.proto = p->addr.proto; - nest_ent = nla_nest_start_noflag(skb, - MDBA_MDB_ENTRY_INFO); - if (!nest_ent) { - nla_nest_cancel(skb, nest2); - err = -EMSGSIZE; - goto out; - } - if (nla_put_nohdr(skb, sizeof(e), &e) || - nla_put_u32(skb, - MDBA_MDB_EATTR_TIMER, - br_timer_value(&p->timer))) { - nla_nest_cancel(skb, nest_ent); + err = __mdb_fill_info(skb, mp, p); + if (err) { nla_nest_cancel(skb, nest2); - err = -EMSGSIZE; goto out; } - nla_nest_end(skb, nest_ent); } nla_nest_end(skb, nest2); skip: @@ -587,6 +616,19 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, return err; } + /* host join */ + if (!port) { + /* don't allow any flags for host-joined groups */ + if (state) + return -EINVAL; + if (mp->host_joined) + return -EEXIST; + + br_multicast_host_join(mp, false); + + return 0; + } + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { @@ -611,19 +653,21 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, { struct br_ip ip; struct net_device *dev; - struct net_bridge_port *p; + struct net_bridge_port *p = NULL; int ret; if (!netif_running(br->dev) || !br_opt_get(br, BROPT_MULTICAST_ENABLED)) return -EINVAL; - dev = __dev_get_by_index(net, entry->ifindex); - if (!dev) - return -ENODEV; + if (entry->ifindex != br->dev->ifindex) { + dev = __dev_get_by_index(net, entry->ifindex); + if (!dev) + return -ENODEV; - p = br_port_get_rtnl(dev); - if (!p || p->br != br || p->state == BR_STATE_DISABLED) - return -EINVAL; + p = br_port_get_rtnl(dev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + } __mdb_entry_to_br_ip(entry, &ip); @@ -638,9 +682,9 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; struct net_device *dev, *pdev; struct br_mdb_entry *entry; - struct net_bridge_port *p; struct net_bridge_vlan *v; struct net_bridge *br; int err; @@ -651,18 +695,22 @@ static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, br = netdev_priv(dev); + if (entry->ifindex != br->dev->ifindex) { + pdev = __dev_get_by_index(net, entry->ifindex); + if (!pdev) + return -ENODEV; + + p = br_port_get_rtnl(pdev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + vg = nbp_vlan_group(p); + } else { + vg = br_vlan_group(br); + } + /* If vlan filtering is enabled and VLAN is not specified * install mdb entry on all vlans configured on the port. */ - pdev = __dev_get_by_index(net, entry->ifindex); - if (!pdev) - return -ENODEV; - - p = br_port_get_rtnl(pdev); - if (!p || p->br != br || p->state == BR_STATE_DISABLED) - return -EINVAL; - - vg = nbp_vlan_group(p); if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; @@ -698,6 +746,15 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) if (!mp) goto unlock; + /* host leave */ + if (entry->ifindex == mp->br->dev->ifindex && mp->host_joined) { + br_multicast_host_leave(mp, false); + err = 0; + if (!mp->ports && netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + goto unlock; + } + for (pp = &mp->ports; (p = mlock_dereference(*pp, br)) != NULL; pp = &p->next) { @@ -730,9 +787,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, { struct net *net = sock_net(skb->sk); struct net_bridge_vlan_group *vg; + struct net_bridge_port *p = NULL; struct net_device *dev, *pdev; struct br_mdb_entry *entry; - struct net_bridge_port *p; struct net_bridge_vlan *v; struct net_bridge *br; int err; @@ -743,18 +800,22 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, br = netdev_priv(dev); + if (entry->ifindex != br->dev->ifindex) { + pdev = __dev_get_by_index(net, entry->ifindex); + if (!pdev) + return -ENODEV; + + p = br_port_get_rtnl(pdev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + vg = nbp_vlan_group(p); + } else { + vg = br_vlan_group(br); + } + /* If vlan filtering is enabled and VLAN is not specified * delete mdb entry on all vlans configured on the port. */ - pdev = __dev_get_by_index(net, entry->ifindex); - if (!pdev) - return -ENODEV; - - p = br_port_get_rtnl(pdev); - if (!p || p->br != br || p->state == BR_STATE_DISABLED) - return -EINVAL; - - vg = nbp_vlan_group(p); if (br_vlan_enabled(br->dev) && vg && entry->vid == 0) { list_for_each_entry(v, &vg->vlan_list, vlist) { entry->vid = v->vid; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index f8cac3702712..ad12fe3fca8c 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -148,8 +148,7 @@ static void br_multicast_group_expired(struct timer_list *t) if (!netif_running(br->dev) || timer_pending(&mp->timer)) goto out; - mp->host_joined = false; - br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0); + br_multicast_host_leave(mp, true); if (mp->ports) goto out; @@ -512,6 +511,27 @@ static bool br_port_group_equal(struct net_bridge_port_group *p, return ether_addr_equal(src, p->eth_addr); } +void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify) +{ + if (!mp->host_joined) { + mp->host_joined = true; + if (notify) + br_mdb_notify(mp->br->dev, NULL, &mp->addr, + RTM_NEWMDB, 0); + } + mod_timer(&mp->timer, jiffies + mp->br->multicast_membership_interval); +} + +void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify) +{ + if (!mp->host_joined) + return; + + mp->host_joined = false; + if (notify) + br_mdb_notify(mp->br->dev, NULL, &mp->addr, RTM_DELMDB, 0); +} + static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group, @@ -534,11 +554,7 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - if (!mp->host_joined) { - mp->host_joined = true; - br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0); - } - mod_timer(&mp->timer, now + br->multicast_membership_interval); + br_multicast_host_join(mp, true); goto out; } @@ -1396,7 +1412,7 @@ br_multicast_leave_group(struct net_bridge *br, del_timer(&p->timer); kfree_rcu(p, rcu); br_mdb_notify(br->dev, port, group, RTM_DELMDB, - p->flags); + p->flags | MDB_PG_FLAGS_FAST_LEAVE); if (!mp->ports && !mp->host_joined && netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 646504db0220..ce2ab14ee605 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -199,6 +199,7 @@ struct net_bridge_fdb_entry { #define MDB_PG_FLAGS_PERMANENT BIT(0) #define MDB_PG_FLAGS_OFFLOAD BIT(1) +#define MDB_PG_FLAGS_FAST_LEAVE BIT(2) struct net_bridge_port_group { struct net_bridge_port *port; @@ -701,6 +702,8 @@ void br_multicast_get_stats(const struct net_bridge *br, struct br_mcast_stats *dest); void br_mdb_init(void); void br_mdb_uninit(void); +void br_multicast_host_join(struct net_bridge_mdb_entry *mp, bool notify); +void br_multicast_host_leave(struct net_bridge_mdb_entry *mp, bool notify); #define mlock_dereference(X, br) \ rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index f5b2aeebbfe9..bb98984cd27d 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -1281,6 +1281,8 @@ int br_vlan_get_info(const struct net_device *dev, u16 vid, p_vinfo->vid = vid; p_vinfo->flags = v->flags; + if (vid == br_get_pvid(vg)) + p_vinfo->flags |= BRIDGE_VLAN_INFO_PVID; return 0; } EXPORT_SYMBOL_GPL(br_vlan_get_info); diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 2c8fe24400e5..68c2519bdc52 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -11,7 +11,13 @@ #include <linux/module.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_802_3.h> +#include <linux/skbuff.h> +#include <uapi/linux/netfilter_bridge/ebt_802_3.h> + +static struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb) +{ + return (struct ebt_802_3_hdr *)skb_mac_header(skb); +} static bool ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par) diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c index 4f5444d2a526..8842798c29e6 100644 --- a/net/bridge/netfilter/nf_conntrack_bridge.c +++ b/net/bridge/netfilter/nf_conntrack_bridge.c @@ -17,7 +17,6 @@ #include <net/netfilter/nf_conntrack_bridge.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/nf_tables.h> #include "../br_private.h" @@ -27,9 +26,9 @@ */ static int nf_br_ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, - struct nf_ct_bridge_frag_data *data, + struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, - const struct nf_ct_bridge_frag_data *data, + const struct nf_bridge_frag_data *data, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; @@ -279,7 +278,7 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb, } static void nf_ct_bridge_frag_save(struct sk_buff *skb, - struct nf_ct_bridge_frag_data *data) + struct nf_bridge_frag_data *data) { if (skb_vlan_tag_present(skb)) { data->vlan_present = true; @@ -294,10 +293,10 @@ static void nf_ct_bridge_frag_save(struct sk_buff *skb, static unsigned int nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, int (*output)(struct net *, struct sock *sk, - const struct nf_ct_bridge_frag_data *data, + const struct nf_bridge_frag_data *data, struct sk_buff *)) { - struct nf_ct_bridge_frag_data data; + struct nf_bridge_frag_data data; if (!BR_INPUT_SKB_CB(skb)->frag_max_size) return NF_ACCEPT; @@ -320,7 +319,7 @@ nf_ct_bridge_refrag(struct sk_buff *skb, const struct nf_hook_state *state, /* Actually only slow path refragmentation needs this. */ static int nf_ct_bridge_frag_restore(struct sk_buff *skb, - const struct nf_ct_bridge_frag_data *data) + const struct nf_bridge_frag_data *data) { int err; @@ -341,7 +340,7 @@ static int nf_ct_bridge_frag_restore(struct sk_buff *skb, } static int nf_ct_bridge_refrag_post(struct net *net, struct sock *sk, - const struct nf_ct_bridge_frag_data *data, + const struct nf_bridge_frag_data *data, struct sk_buff *skb) { int err; diff --git a/net/can/Kconfig b/net/can/Kconfig index 0f9fe846ddef..d77042752457 100644 --- a/net/can/Kconfig +++ b/net/can/Kconfig @@ -8,11 +8,12 @@ menuconfig CAN tristate "CAN bus subsystem support" ---help--- Controller Area Network (CAN) is a slow (up to 1Mbit/s) serial - communications protocol which was developed by Bosch in - 1991, mainly for automotive, but now widely used in marine - (NMEA2000), industrial, and medical applications. - More information on the CAN network protocol family PF_CAN - is contained in <Documentation/networking/can.rst>. + communications protocol. Development of the CAN bus started in + 1983 at Robert Bosch GmbH, and the protocol was officially + released in 1986. The CAN bus was originally mainly for automotive, + but is now widely used in marine (NMEA2000), industrial, and medical + applications. More information on the CAN network protocol family + PF_CAN is contained in <Documentation/networking/can.rst>. If you want CAN support you should say Y here and also to the specific driver for your controller(s) below. @@ -52,6 +53,8 @@ config CAN_GW They can be modified with AND/OR/XOR/SET operations as configured by the netlink configuration interface known e.g. from iptables. +source "net/can/j1939/Kconfig" + source "drivers/net/can/Kconfig" endif diff --git a/net/can/Makefile b/net/can/Makefile index 1242bbbfe57f..08bd217fc051 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -15,3 +15,5 @@ can-bcm-y := bcm.o obj-$(CONFIG_CAN_GW) += can-gw.o can-gw-y := gw.o + +obj-$(CONFIG_CAN_J1939) += j1939/ diff --git a/net/can/af_can.c b/net/can/af_can.c index 80281ef2ccbd..5518a7d9eed9 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -1,5 +1,5 @@ -/* - * af_can.c - Protocol family CAN core module +// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) +/* af_can.c - Protocol family CAN core module * (used by different CAN protocol modules) * * Copyright (c) 2002-2017 Volkswagen Group Electronic Research @@ -58,6 +58,7 @@ #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/skb.h> +#include <linux/can/can-ml.h> #include <linux/ratelimit.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -83,18 +84,7 @@ static DEFINE_MUTEX(proto_tab_lock); static atomic_t skbcounter = ATOMIC_INIT(0); -/* - * af_can socket functions - */ - -int can_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - default: - return -ENOIOCTLCMD; - } -} -EXPORT_SYMBOL(can_ioctl); +/* af_can socket functions */ static void can_sock_destruct(struct sock *sk) { @@ -140,14 +130,13 @@ static int can_create(struct net *net, struct socket *sock, int protocol, err = request_module("can-proto-%d", protocol); - /* - * In case of error we only print a message but don't + /* In case of error we only print a message but don't * return the error code immediately. Below we will * return -EPROTONOSUPPORT */ if (err) - printk_ratelimited(KERN_ERR "can: request_module " - "(can-proto-%d) failed.\n", protocol); + pr_err_ratelimited("can: request_module (can-proto-%d) failed.\n", + protocol); cp = can_get_proto(protocol); } @@ -188,9 +177,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol, return err; } -/* - * af_can tx path - */ +/* af_can tx path */ /** * can_send - transmit a CAN frame (optional with local loopback) @@ -212,7 +199,7 @@ int can_send(struct sk_buff *skb, int loop) { struct sk_buff *newskb = NULL; struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - struct s_stats *can_stats = dev_net(skb->dev)->can.can_stats; + struct can_pkg_stats *pkg_stats = dev_net(skb->dev)->can.pkg_stats; int err = -EINVAL; if (skb->len == CAN_MTU) { @@ -223,11 +210,11 @@ int can_send(struct sk_buff *skb, int loop) skb->protocol = htons(ETH_P_CANFD); if (unlikely(cfd->len > CANFD_MAX_DLEN)) goto inval_skb; - } else + } else { goto inval_skb; + } - /* - * Make sure the CAN frame can pass the selected CAN netdevice. + /* Make sure the CAN frame can pass the selected CAN netdevice. * As structs can_frame and canfd_frame are similar, we can provide * CAN FD frames to legacy CAN drivers as long as the length is <= 8 */ @@ -258,8 +245,7 @@ int can_send(struct sk_buff *skb, int loop) /* indication for the CAN driver: do loopback */ skb->pkt_type = PACKET_LOOPBACK; - /* - * The reference to the originating sock may be required + /* The reference to the originating sock may be required * by the receiving socket to check whether the frame is * its own. Example: can_raw sockopt CAN_RAW_RECV_OWN_MSGS * Therefore we have to ensure that skb->sk remains the @@ -268,8 +254,7 @@ int can_send(struct sk_buff *skb, int loop) */ if (!(skb->dev->flags & IFF_ECHO)) { - /* - * If the interface is not capable to do loopback + /* If the interface is not capable to do loopback * itself, we do it here. */ newskb = skb_clone(skb, GFP_ATOMIC); @@ -301,8 +286,8 @@ int can_send(struct sk_buff *skb, int loop) netif_rx_ni(newskb); /* update statistics */ - can_stats->tx_frames++; - can_stats->tx_frames_delta++; + pkg_stats->tx_frames++; + pkg_stats->tx_frames_delta++; return 0; @@ -312,17 +297,17 @@ inval_skb: } EXPORT_SYMBOL(can_send); -/* - * af_can rx path - */ +/* af_can rx path */ -static struct can_dev_rcv_lists *find_dev_rcv_lists(struct net *net, - struct net_device *dev) +static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net, + struct net_device *dev) { - if (!dev) - return net->can.can_rx_alldev_list; - else - return (struct can_dev_rcv_lists *)dev->ml_priv; + if (dev) { + struct can_ml_priv *ml_priv = dev->ml_priv; + return &ml_priv->dev_rcv_lists; + } else { + return net->can.rx_alldev_list; + } } /** @@ -349,7 +334,7 @@ static unsigned int effhash(canid_t can_id) } /** - * find_rcv_list - determine optimal filterlist inside device filter struct + * can_rcv_list_find - determine optimal filterlist inside device filter struct * @can_id: pointer to CAN identifier of a given can_filter * @mask: pointer to CAN mask of a given can_filter * @d: pointer to the device filter struct @@ -375,8 +360,8 @@ static unsigned int effhash(canid_t can_id) * Constistency checked mask. * Reduced can_id to have a preprocessed filter compare value. */ -static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, - struct can_dev_rcv_lists *d) +static struct hlist_head *can_rcv_list_find(canid_t *can_id, canid_t *mask, + struct can_dev_rcv_lists *dev_rcv_lists) { canid_t inv = *can_id & CAN_INV_FILTER; /* save flag before masking */ @@ -384,7 +369,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, if (*mask & CAN_ERR_FLAG) { /* clear CAN_ERR_FLAG in filter entry */ *mask &= CAN_ERR_MASK; - return &d->rx[RX_ERR]; + return &dev_rcv_lists->rx[RX_ERR]; } /* with cleared CAN_ERR_FLAG we have a simple mask/value filterpair */ @@ -400,27 +385,26 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask, /* inverse can_id/can_mask filter */ if (inv) - return &d->rx[RX_INV]; + return &dev_rcv_lists->rx[RX_INV]; /* mask == 0 => no condition testing at receive time */ if (!(*mask)) - return &d->rx[RX_ALL]; + return &dev_rcv_lists->rx[RX_ALL]; /* extra filterlists for the subscription of a single non-RTR can_id */ if (((*mask & CAN_EFF_RTR_FLAGS) == CAN_EFF_RTR_FLAGS) && !(*can_id & CAN_RTR_FLAG)) { - if (*can_id & CAN_EFF_FLAG) { if (*mask == (CAN_EFF_MASK | CAN_EFF_RTR_FLAGS)) - return &d->rx_eff[effhash(*can_id)]; + return &dev_rcv_lists->rx_eff[effhash(*can_id)]; } else { if (*mask == (CAN_SFF_MASK | CAN_EFF_RTR_FLAGS)) - return &d->rx_sff[*can_id]; + return &dev_rcv_lists->rx_sff[*can_id]; } } /* default: filter via can_id/can_mask */ - return &d->rx[RX_FIL]; + return &dev_rcv_lists->rx[RX_FIL]; } /** @@ -457,10 +441,10 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, char *ident, struct sock *sk) { - struct receiver *r; - struct hlist_head *rl; - struct can_dev_rcv_lists *d; - struct s_pstats *can_pstats = net->can.can_pstats; + struct receiver *rcv; + struct hlist_head *rcv_list; + struct can_dev_rcv_lists *dev_rcv_lists; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; int err = 0; /* insert new receiver (dev,canid,mask) -> (func,data) */ @@ -471,50 +455,42 @@ int can_rx_register(struct net *net, struct net_device *dev, canid_t can_id, if (dev && !net_eq(net, dev_net(dev))) return -ENODEV; - r = kmem_cache_alloc(rcv_cache, GFP_KERNEL); - if (!r) + rcv = kmem_cache_alloc(rcv_cache, GFP_KERNEL); + if (!rcv) return -ENOMEM; - spin_lock(&net->can.can_rcvlists_lock); + spin_lock_bh(&net->can.rcvlists_lock); - d = find_dev_rcv_lists(net, dev); - if (d) { - rl = find_rcv_list(&can_id, &mask, d); + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists); - r->can_id = can_id; - r->mask = mask; - r->matches = 0; - r->func = func; - r->data = data; - r->ident = ident; - r->sk = sk; + rcv->can_id = can_id; + rcv->mask = mask; + rcv->matches = 0; + rcv->func = func; + rcv->data = data; + rcv->ident = ident; + rcv->sk = sk; - hlist_add_head_rcu(&r->list, rl); - d->entries++; + hlist_add_head_rcu(&rcv->list, rcv_list); + dev_rcv_lists->entries++; - can_pstats->rcv_entries++; - if (can_pstats->rcv_entries_max < can_pstats->rcv_entries) - can_pstats->rcv_entries_max = can_pstats->rcv_entries; - } else { - kmem_cache_free(rcv_cache, r); - err = -ENODEV; - } - - spin_unlock(&net->can.can_rcvlists_lock); + rcv_lists_stats->rcv_entries++; + rcv_lists_stats->rcv_entries_max = max(rcv_lists_stats->rcv_entries_max, + rcv_lists_stats->rcv_entries); + spin_unlock_bh(&net->can.rcvlists_lock); return err; } EXPORT_SYMBOL(can_rx_register); -/* - * can_rx_delete_receiver - rcu callback for single receiver entry removal - */ +/* can_rx_delete_receiver - rcu callback for single receiver entry removal */ static void can_rx_delete_receiver(struct rcu_head *rp) { - struct receiver *r = container_of(rp, struct receiver, rcu); - struct sock *sk = r->sk; + struct receiver *rcv = container_of(rp, struct receiver, rcu); + struct sock *sk = rcv->sk; - kmem_cache_free(rcv_cache, r); + kmem_cache_free(rcv_cache, rcv); if (sk) sock_put(sk); } @@ -534,10 +510,10 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data) { - struct receiver *r = NULL; - struct hlist_head *rl; - struct s_pstats *can_pstats = net->can.can_pstats; - struct can_dev_rcv_lists *d; + struct receiver *rcv = NULL; + struct hlist_head *rcv_list; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; + struct can_dev_rcv_lists *dev_rcv_lists; if (dev && dev->type != ARPHRD_CAN) return; @@ -545,86 +521,69 @@ void can_rx_unregister(struct net *net, struct net_device *dev, canid_t can_id, if (dev && !net_eq(net, dev_net(dev))) return; - spin_lock(&net->can.can_rcvlists_lock); + spin_lock_bh(&net->can.rcvlists_lock); - d = find_dev_rcv_lists(net, dev); - if (!d) { - pr_err("BUG: receive list not found for " - "dev %s, id %03X, mask %03X\n", - DNAME(dev), can_id, mask); - goto out; - } + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + rcv_list = can_rcv_list_find(&can_id, &mask, dev_rcv_lists); - rl = find_rcv_list(&can_id, &mask, d); - - /* - * Search the receiver list for the item to delete. This should + /* Search the receiver list for the item to delete. This should * exist, since no receiver may be unregistered that hasn't * been registered before. */ - - hlist_for_each_entry_rcu(r, rl, list) { - if (r->can_id == can_id && r->mask == mask && - r->func == func && r->data == data) + hlist_for_each_entry_rcu(rcv, rcv_list, list) { + if (rcv->can_id == can_id && rcv->mask == mask && + rcv->func == func && rcv->data == data) break; } - /* - * Check for bugs in CAN protocol implementations using af_can.c: - * 'r' will be NULL if no matching list item was found for removal. + /* Check for bugs in CAN protocol implementations using af_can.c: + * 'rcv' will be NULL if no matching list item was found for removal. */ - - if (!r) { - WARN(1, "BUG: receive list entry not found for dev %s, " - "id %03X, mask %03X\n", DNAME(dev), can_id, mask); + if (!rcv) { + WARN(1, "BUG: receive list entry not found for dev %s, id %03X, mask %03X\n", + DNAME(dev), can_id, mask); goto out; } - hlist_del_rcu(&r->list); - d->entries--; + hlist_del_rcu(&rcv->list); + dev_rcv_lists->entries--; - if (can_pstats->rcv_entries > 0) - can_pstats->rcv_entries--; - - /* remove device structure requested by NETDEV_UNREGISTER */ - if (d->remove_on_zero_entries && !d->entries) { - kfree(d); - dev->ml_priv = NULL; - } + if (rcv_lists_stats->rcv_entries > 0) + rcv_lists_stats->rcv_entries--; out: - spin_unlock(&net->can.can_rcvlists_lock); + spin_unlock_bh(&net->can.rcvlists_lock); /* schedule the receiver item for deletion */ - if (r) { - if (r->sk) - sock_hold(r->sk); - call_rcu(&r->rcu, can_rx_delete_receiver); + if (rcv) { + if (rcv->sk) + sock_hold(rcv->sk); + call_rcu(&rcv->rcu, can_rx_delete_receiver); } } EXPORT_SYMBOL(can_rx_unregister); -static inline void deliver(struct sk_buff *skb, struct receiver *r) +static inline void deliver(struct sk_buff *skb, struct receiver *rcv) { - r->func(skb, r->data); - r->matches++; + rcv->func(skb, rcv->data); + rcv->matches++; } -static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) +static int can_rcv_filter(struct can_dev_rcv_lists *dev_rcv_lists, struct sk_buff *skb) { - struct receiver *r; + struct receiver *rcv; int matches = 0; struct can_frame *cf = (struct can_frame *)skb->data; canid_t can_id = cf->can_id; - if (d->entries == 0) + if (dev_rcv_lists->entries == 0) return 0; if (can_id & CAN_ERR_FLAG) { /* check for error message frame entries only */ - hlist_for_each_entry_rcu(r, &d->rx[RX_ERR], list) { - if (can_id & r->mask) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ERR], list) { + if (can_id & rcv->mask) { + deliver(skb, rcv); matches++; } } @@ -632,23 +591,23 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) } /* check for unfiltered entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_ALL], list) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_ALL], list) { + deliver(skb, rcv); matches++; } /* check for can_id/mask entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_FIL], list) { - if ((can_id & r->mask) == r->can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_FIL], list) { + if ((can_id & rcv->mask) == rcv->can_id) { + deliver(skb, rcv); matches++; } } /* check for inverted can_id/mask entries */ - hlist_for_each_entry_rcu(r, &d->rx[RX_INV], list) { - if ((can_id & r->mask) != r->can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx[RX_INV], list) { + if ((can_id & rcv->mask) != rcv->can_id) { + deliver(skb, rcv); matches++; } } @@ -658,16 +617,16 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) return matches; if (can_id & CAN_EFF_FLAG) { - hlist_for_each_entry_rcu(r, &d->rx_eff[effhash(can_id)], list) { - if (r->can_id == can_id) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_eff[effhash(can_id)], list) { + if (rcv->can_id == can_id) { + deliver(skb, rcv); matches++; } } } else { can_id &= CAN_SFF_MASK; - hlist_for_each_entry_rcu(r, &d->rx_sff[can_id], list) { - deliver(skb, r); + hlist_for_each_entry_rcu(rcv, &dev_rcv_lists->rx_sff[can_id], list) { + deliver(skb, rcv); matches++; } } @@ -677,14 +636,14 @@ static int can_rcv_filter(struct can_dev_rcv_lists *d, struct sk_buff *skb) static void can_receive(struct sk_buff *skb, struct net_device *dev) { - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = dev_net(dev); - struct s_stats *can_stats = net->can.can_stats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; int matches; /* update statistics */ - can_stats->rx_frames++; - can_stats->rx_frames_delta++; + pkg_stats->rx_frames++; + pkg_stats->rx_frames_delta++; /* create non-zero unique skb identifier together with *skb */ while (!(can_skb_prv(skb)->skbcnt)) @@ -693,12 +652,11 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); /* deliver the packet to sockets listening on all devices */ - matches = can_rcv_filter(net->can.can_rx_alldev_list, skb); + matches = can_rcv_filter(net->can.rx_alldev_list, skb); /* find receive list for this device */ - d = find_dev_rcv_lists(net, dev); - if (d) - matches += can_rcv_filter(d, skb); + dev_rcv_lists = can_dev_rcv_lists_find(net, dev); + matches += can_rcv_filter(dev_rcv_lists, skb); rcu_read_unlock(); @@ -706,8 +664,8 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) consume_skb(skb); if (matches > 0) { - can_stats->matches++; - can_stats->matches_delta++; + pkg_stats->matches++; + pkg_stats->matches_delta++; } } @@ -729,7 +687,7 @@ static int can_rcv(struct sk_buff *skb, struct net_device *dev, } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) + struct packet_type *pt, struct net_device *orig_dev) { struct canfd_frame *cfd = (struct canfd_frame *)skb->data; @@ -745,9 +703,7 @@ static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, return NET_RX_SUCCESS; } -/* - * af_can protocol functions - */ +/* af_can protocol functions */ /** * can_proto_register - register CAN transport protocol @@ -778,8 +734,9 @@ int can_proto_register(const struct can_proto *cp) if (rcu_access_pointer(proto_tab[proto])) { pr_err("can: protocol %d already registered\n", proto); err = -EBUSY; - } else + } else { RCU_INIT_POINTER(proto_tab[proto], cp); + } mutex_unlock(&proto_tab_lock); @@ -809,48 +766,19 @@ void can_proto_unregister(const struct can_proto *cp) } EXPORT_SYMBOL(can_proto_unregister); -/* - * af_can notifier to create/remove CAN netdevice specific structs - */ +/* af_can notifier to create/remove CAN netdevice specific structs */ static int can_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct can_dev_rcv_lists *d; if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; switch (msg) { - case NETDEV_REGISTER: - - /* create new dev_rcv_lists for this device */ - d = kzalloc(sizeof(*d), GFP_KERNEL); - if (!d) - return NOTIFY_DONE; - BUG_ON(dev->ml_priv); - dev->ml_priv = d; - - break; - - case NETDEV_UNREGISTER: - spin_lock(&dev_net(dev)->can.can_rcvlists_lock); - - d = dev->ml_priv; - if (d) { - if (d->entries) - d->remove_on_zero_entries = 1; - else { - kfree(d); - dev->ml_priv = NULL; - } - } else - pr_err("can: notifier: receive list not found for dev " - "%s\n", dev->name); - - spin_unlock(&dev_net(dev)->can.can_rcvlists_lock); - + WARN(!dev->ml_priv, + "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n"); break; } @@ -859,71 +787,54 @@ static int can_notifier(struct notifier_block *nb, unsigned long msg, static int can_pernet_init(struct net *net) { - spin_lock_init(&net->can.can_rcvlists_lock); - net->can.can_rx_alldev_list = - kzalloc(sizeof(struct can_dev_rcv_lists), GFP_KERNEL); - if (!net->can.can_rx_alldev_list) + spin_lock_init(&net->can.rcvlists_lock); + net->can.rx_alldev_list = + kzalloc(sizeof(*net->can.rx_alldev_list), GFP_KERNEL); + if (!net->can.rx_alldev_list) goto out; - net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); - if (!net->can.can_stats) - goto out_free_alldev_list; - net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL); - if (!net->can.can_pstats) - goto out_free_can_stats; + net->can.pkg_stats = kzalloc(sizeof(*net->can.pkg_stats), GFP_KERNEL); + if (!net->can.pkg_stats) + goto out_free_rx_alldev_list; + net->can.rcv_lists_stats = kzalloc(sizeof(*net->can.rcv_lists_stats), GFP_KERNEL); + if (!net->can.rcv_lists_stats) + goto out_free_pkg_stats; if (IS_ENABLED(CONFIG_PROC_FS)) { /* the statistics are updated every second (timer triggered) */ if (stats_timer) { - timer_setup(&net->can.can_stattimer, can_stat_update, + timer_setup(&net->can.stattimer, can_stat_update, 0); - mod_timer(&net->can.can_stattimer, + mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ)); } - net->can.can_stats->jiffies_init = jiffies; + net->can.pkg_stats->jiffies_init = jiffies; can_init_proc(net); } return 0; - out_free_can_stats: - kfree(net->can.can_stats); - out_free_alldev_list: - kfree(net->can.can_rx_alldev_list); + out_free_pkg_stats: + kfree(net->can.pkg_stats); + out_free_rx_alldev_list: + kfree(net->can.rx_alldev_list); out: return -ENOMEM; } static void can_pernet_exit(struct net *net) { - struct net_device *dev; - if (IS_ENABLED(CONFIG_PROC_FS)) { can_remove_proc(net); if (stats_timer) - del_timer_sync(&net->can.can_stattimer); + del_timer_sync(&net->can.stattimer); } - /* remove created dev_rcv_lists from still registered CAN devices */ - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { - if (dev->type == ARPHRD_CAN && dev->ml_priv) { - struct can_dev_rcv_lists *d = dev->ml_priv; - - BUG_ON(d->entries); - kfree(d); - dev->ml_priv = NULL; - } - } - rcu_read_unlock(); - - kfree(net->can.can_rx_alldev_list); - kfree(net->can.can_stats); - kfree(net->can.can_pstats); + kfree(net->can.rx_alldev_list); + kfree(net->can.pkg_stats); + kfree(net->can.rcv_lists_stats); } -/* - * af_can module init/exit functions - */ +/* af_can module init/exit functions */ static struct packet_type can_packet __read_mostly = { .type = cpu_to_be16(ETH_P_CAN), diff --git a/net/can/af_can.h b/net/can/af_can.h index 9cb3719632bd..7c2d9161e224 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -1,5 +1,5 @@ -/* - * Copyright (c) 2002-2007 Volkswagen Group Electronic Research +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */ +/* Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,32 +53,17 @@ struct receiver { canid_t can_id; canid_t mask; unsigned long matches; - void (*func)(struct sk_buff *, void *); + void (*func)(struct sk_buff *skb, void *data); void *data; char *ident; struct sock *sk; struct rcu_head rcu; }; -#define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS) -#define CAN_EFF_RCV_HASH_BITS 10 -#define CAN_EFF_RCV_ARRAY_SZ (1 << CAN_EFF_RCV_HASH_BITS) - -enum { RX_ERR, RX_ALL, RX_FIL, RX_INV, RX_MAX }; - -/* per device receive filters linked at dev->ml_priv */ -struct can_dev_rcv_lists { - struct hlist_head rx[RX_MAX]; - struct hlist_head rx_sff[CAN_SFF_RCV_ARRAY_SZ]; - struct hlist_head rx_eff[CAN_EFF_RCV_ARRAY_SZ]; - int remove_on_zero_entries; - int entries; -}; - /* statistic structures */ /* can be reset e.g. by can_init_stats() */ -struct s_stats { +struct can_pkg_stats { unsigned long jiffies_init; unsigned long rx_frames; @@ -103,7 +88,7 @@ struct s_stats { }; /* persistent statistics */ -struct s_pstats { +struct can_rcv_lists_stats { unsigned long stats_reset; unsigned long user_reset; unsigned long rcv_entries; diff --git a/net/can/bcm.c b/net/can/bcm.c index a34ee52f19ea..c96fa0f33db3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content * @@ -105,7 +106,6 @@ struct bcm_op { unsigned long frames_abs, frames_filtered; struct bcm_timeval ival1, ival2; struct hrtimer timer, thrtimer; - struct tasklet_struct tsklet, thrtsklet; ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; int rx_ifindex; int cfsiz; @@ -370,25 +370,34 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, } } -static void bcm_tx_start_timer(struct bcm_op *op) +static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt) { + ktime_t ival; + if (op->kt_ival1 && op->count) - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival1), - HRTIMER_MODE_ABS); + ival = op->kt_ival1; else if (op->kt_ival2) - hrtimer_start(&op->timer, - ktime_add(ktime_get(), op->kt_ival2), - HRTIMER_MODE_ABS); + ival = op->kt_ival2; + else + return false; + + hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival)); + return true; } -static void bcm_tx_timeout_tsklet(unsigned long data) +static void bcm_tx_start_timer(struct bcm_op *op) { - struct bcm_op *op = (struct bcm_op *)data; + if (bcm_tx_set_expiry(op, &op->timer)) + hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT); +} + +/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ +static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) +{ + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; if (op->kt_ival1 && (op->count > 0)) { - op->count--; if (!op->count && (op->flags & TX_COUNTEVT)) { @@ -405,22 +414,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data) } bcm_can_tx(op); - } else if (op->kt_ival2) + } else if (op->kt_ival2) { bcm_can_tx(op); + } - bcm_tx_start_timer(op); -} - -/* - * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions - */ -static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) -{ - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); - - tasklet_schedule(&op->tsklet); - - return HRTIMER_NORESTART; + return bcm_tx_set_expiry(op, &op->timer) ? + HRTIMER_RESTART : HRTIMER_NORESTART; } /* @@ -486,7 +485,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op, /* do not send the saved data - only start throttle timer */ hrtimer_start(&op->thrtimer, ktime_add(op->kt_lastmsg, op->kt_ival2), - HRTIMER_MODE_ABS); + HRTIMER_MODE_ABS_SOFT); return; } @@ -545,14 +544,21 @@ static void bcm_rx_starttimer(struct bcm_op *op) return; if (op->kt_ival1) - hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL); + hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); } -static void bcm_rx_timeout_tsklet(unsigned long data) +/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */ +static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) { - struct bcm_op *op = (struct bcm_op *)data; + struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; + /* if user wants to be informed, when cyclic CAN-Messages come back */ + if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { + /* clear received CAN frames to indicate 'nothing received' */ + memset(op->last_frames, 0, op->nframes * op->cfsiz); + } + /* create notification to user */ msg_head.opcode = RX_TIMEOUT; msg_head.flags = op->flags; @@ -563,25 +569,6 @@ static void bcm_rx_timeout_tsklet(unsigned long data) msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); -} - -/* - * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out - */ -static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) -{ - struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); - - /* schedule before NET_RX_SOFTIRQ */ - tasklet_hi_schedule(&op->tsklet); - - /* no restart of the timer is done here! */ - - /* if user wants to be informed, when cyclic CAN-Messages come back */ - if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { - /* clear received CAN frames to indicate 'nothing received' */ - memset(op->last_frames, 0, op->nframes * op->cfsiz); - } return HRTIMER_NORESTART; } @@ -589,14 +576,12 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) /* * bcm_rx_do_flush - helper for bcm_rx_thr_flush */ -static inline int bcm_rx_do_flush(struct bcm_op *op, int update, - unsigned int index) +static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index) { struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; if ((op->last_frames) && (lcf->flags & RX_THR)) { - if (update) - bcm_rx_changed(op, lcf); + bcm_rx_changed(op, lcf); return 1; } return 0; @@ -604,11 +589,8 @@ static inline int bcm_rx_do_flush(struct bcm_op *op, int update, /* * bcm_rx_thr_flush - Check for throttled data and send it to the userspace - * - * update == 0 : just check if throttled data is available (any irq context) - * update == 1 : check and send throttled data to userspace (soft_irq context) */ -static int bcm_rx_thr_flush(struct bcm_op *op, int update) +static int bcm_rx_thr_flush(struct bcm_op *op) { int updated = 0; @@ -617,24 +599,16 @@ static int bcm_rx_thr_flush(struct bcm_op *op, int update) /* for MUX filter we start at index 1 */ for (i = 1; i < op->nframes; i++) - updated += bcm_rx_do_flush(op, update, i); + updated += bcm_rx_do_flush(op, i); } else { /* for RX_FILTER_ID and simple filter */ - updated += bcm_rx_do_flush(op, update, 0); + updated += bcm_rx_do_flush(op, 0); } return updated; } -static void bcm_rx_thr_tsklet(unsigned long data) -{ - struct bcm_op *op = (struct bcm_op *)data; - - /* push the changed data to the userspace */ - bcm_rx_thr_flush(op, 1); -} - /* * bcm_rx_thr_handler - the time for blocked content updates is over now: * Check for throttled data and send it to the userspace @@ -643,9 +617,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); - tasklet_schedule(&op->thrtsklet); - - if (bcm_rx_thr_flush(op, 0)) { + if (bcm_rx_thr_flush(op)) { hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2); return HRTIMER_RESTART; } else { @@ -741,23 +713,8 @@ static struct bcm_op *bcm_find_op(struct list_head *ops, static void bcm_remove_op(struct bcm_op *op) { - if (op->tsklet.func) { - while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) || - test_bit(TASKLET_STATE_RUN, &op->tsklet.state) || - hrtimer_active(&op->timer)) { - hrtimer_cancel(&op->timer); - tasklet_kill(&op->tsklet); - } - } - - if (op->thrtsklet.func) { - while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) || - test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) || - hrtimer_active(&op->thrtimer)) { - hrtimer_cancel(&op->thrtimer); - tasklet_kill(&op->thrtsklet); - } - } + hrtimer_cancel(&op->timer); + hrtimer_cancel(&op->thrtimer); if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); @@ -990,15 +947,13 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ - hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&op->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_tx_timeout_handler; - /* initialize tasklet for tx countevent notification */ - tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet, - (unsigned long) op); - /* currently unused in tx_ops */ - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the tx_ops */ list_add(&op->list, &bo->tx_ops); @@ -1167,20 +1122,14 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, op->rx_ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ - hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&op->timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_rx_timeout_handler; - /* initialize tasklet for rx timeout notification */ - tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet, - (unsigned long) op); - - hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); op->thrtimer.function = bcm_rx_thr_handler; - /* initialize tasklet for rx throttle handling */ - tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet, - (unsigned long) op); - /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); @@ -1226,12 +1175,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, */ op->kt_lastmsg = 0; hrtimer_cancel(&op->thrtimer); - bcm_rx_thr_flush(op, 1); + bcm_rx_thr_flush(op); } if ((op->flags & STARTTIMER) && op->kt_ival1) hrtimer_start(&op->timer, op->kt_ival1, - HRTIMER_MODE_REL); + HRTIMER_MODE_REL_SOFT); } /* now we can register for can_ids, if we added a new bcm_op */ @@ -1345,7 +1294,7 @@ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) /* no bound device as default => check msg_name */ DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); - if (msg->msg_namelen < sizeof(*addr)) + if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) @@ -1587,7 +1536,7 @@ static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, struct net *net = sock_net(sk); int ret = 0; - if (len < sizeof(*addr)) + if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; lock_sock(sk); @@ -1679,6 +1628,13 @@ static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return size; } +static int bcm_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* no ioctls for socket layer -> hand it down to NIC layer */ + return -ENOIOCTLCMD; +} + static const struct proto_ops bcm_ops = { .family = PF_CAN, .release = bcm_release, @@ -1688,7 +1644,7 @@ static const struct proto_ops bcm_ops = { .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, - .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ + .ioctl = bcm_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, diff --git a/net/can/gw.c b/net/can/gw.c index 72711053ebe6..65d60c93af29 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1,7 +1,7 @@ -/* - * gw.c - CAN frame Gateway/Router/Bridge with netlink interface +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* gw.c - CAN frame Gateway/Router/Bridge with netlink interface * - * Copyright (c) 2017 Volkswagen Group Electronic Research + * Copyright (c) 2019 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,7 +59,7 @@ #include <net/net_namespace.h> #include <net/sock.h> -#define CAN_GW_VERSION "20170425" +#define CAN_GW_VERSION "20190810" #define CAN_GW_NAME "can-gw" MODULE_DESCRIPTION("PF_CAN netlink gateway"); @@ -85,10 +85,10 @@ static struct kmem_cache *cgw_cache __read_mostly; /* structure that contains the (on-the-fly) CAN frame modifications */ struct cf_mod { struct { - struct can_frame and; - struct can_frame or; - struct can_frame xor; - struct can_frame set; + struct canfd_frame and; + struct canfd_frame or; + struct canfd_frame xor; + struct canfd_frame set; } modframe; struct { u8 and; @@ -96,7 +96,7 @@ struct cf_mod { u8 xor; u8 set; } modtype; - void (*modfunc[MAX_MODFUNCTIONS])(struct can_frame *cf, + void (*modfunc[MAX_MODFUNCTIONS])(struct canfd_frame *cf, struct cf_mod *mod); /* CAN frame checksum calculation after CAN frame modifications */ @@ -105,15 +105,15 @@ struct cf_mod { struct cgw_csum_crc8 crc8; } csum; struct { - void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor); - void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8); + void (*xor)(struct canfd_frame *cf, + struct cgw_csum_xor *xor); + void (*crc8)(struct canfd_frame *cf, + struct cgw_csum_crc8 *crc8); } csumfunc; u32 uid; }; - -/* - * So far we just support CAN -> CAN routing and frame modifications. +/* So far we just support CAN -> CAN routing and frame modifications. * * The internal can_can_gw structure contains data and attributes for * a CAN -> CAN gateway job. @@ -151,39 +151,88 @@ struct cgw_job { /* modification functions that are invoked in the hot path in can_can_gw_rcv */ -#define MODFUNC(func, op) static void func(struct can_frame *cf, \ +#define MODFUNC(func, op) static void func(struct canfd_frame *cf, \ struct cf_mod *mod) { op ; } MODFUNC(mod_and_id, cf->can_id &= mod->modframe.and.can_id) -MODFUNC(mod_and_dlc, cf->can_dlc &= mod->modframe.and.can_dlc) +MODFUNC(mod_and_len, cf->len &= mod->modframe.and.len) +MODFUNC(mod_and_flags, cf->flags &= mod->modframe.and.flags) MODFUNC(mod_and_data, *(u64 *)cf->data &= *(u64 *)mod->modframe.and.data) MODFUNC(mod_or_id, cf->can_id |= mod->modframe.or.can_id) -MODFUNC(mod_or_dlc, cf->can_dlc |= mod->modframe.or.can_dlc) +MODFUNC(mod_or_len, cf->len |= mod->modframe.or.len) +MODFUNC(mod_or_flags, cf->flags |= mod->modframe.or.flags) MODFUNC(mod_or_data, *(u64 *)cf->data |= *(u64 *)mod->modframe.or.data) MODFUNC(mod_xor_id, cf->can_id ^= mod->modframe.xor.can_id) -MODFUNC(mod_xor_dlc, cf->can_dlc ^= mod->modframe.xor.can_dlc) +MODFUNC(mod_xor_len, cf->len ^= mod->modframe.xor.len) +MODFUNC(mod_xor_flags, cf->flags ^= mod->modframe.xor.flags) MODFUNC(mod_xor_data, *(u64 *)cf->data ^= *(u64 *)mod->modframe.xor.data) MODFUNC(mod_set_id, cf->can_id = mod->modframe.set.can_id) -MODFUNC(mod_set_dlc, cf->can_dlc = mod->modframe.set.can_dlc) +MODFUNC(mod_set_len, cf->len = mod->modframe.set.len) +MODFUNC(mod_set_flags, cf->flags = mod->modframe.set.flags) MODFUNC(mod_set_data, *(u64 *)cf->data = *(u64 *)mod->modframe.set.data) -static inline void canframecpy(struct can_frame *dst, struct can_frame *src) +static void mod_and_fddata(struct canfd_frame *cf, struct cf_mod *mod) +{ + int i; + + for (i = 0; i < CANFD_MAX_DLEN; i += 8) + *(u64 *)(cf->data + i) &= *(u64 *)(mod->modframe.and.data + i); +} + +static void mod_or_fddata(struct canfd_frame *cf, struct cf_mod *mod) +{ + int i; + + for (i = 0; i < CANFD_MAX_DLEN; i += 8) + *(u64 *)(cf->data + i) |= *(u64 *)(mod->modframe.or.data + i); +} + +static void mod_xor_fddata(struct canfd_frame *cf, struct cf_mod *mod) +{ + int i; + + for (i = 0; i < CANFD_MAX_DLEN; i += 8) + *(u64 *)(cf->data + i) ^= *(u64 *)(mod->modframe.xor.data + i); +} + +static void mod_set_fddata(struct canfd_frame *cf, struct cf_mod *mod) +{ + memcpy(cf->data, mod->modframe.set.data, CANFD_MAX_DLEN); +} + +static void canframecpy(struct canfd_frame *dst, struct can_frame *src) { - /* - * Copy the struct members separately to ensure that no uninitialized + /* Copy the struct members separately to ensure that no uninitialized * data are copied in the 3 bytes hole of the struct. This is needed * to make easy compares of the data in the struct cf_mod. */ dst->can_id = src->can_id; - dst->can_dlc = src->can_dlc; + dst->len = src->can_dlc; *(u64 *)dst->data = *(u64 *)src->data; } -static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re) +static void canfdframecpy(struct canfd_frame *dst, struct canfd_frame *src) { - /* - * absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] + /* Copy the struct members separately to ensure that no uninitialized + * data are copied in the 2 bytes hole of the struct. This is needed + * to make easy compares of the data in the struct cf_mod. + */ + + dst->can_id = src->can_id; + dst->flags = src->flags; + dst->len = src->len; + memcpy(dst->data, src->data, CANFD_MAX_DLEN); +} + +static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re, struct rtcanmsg *r) +{ + s8 dlen = CAN_MAX_DLEN; + + if (r->flags & CGW_FLAGS_CAN_FD) + dlen = CANFD_MAX_DLEN; + + /* absolute dlc values 0 .. 7 => 0 .. 7, e.g. data [0] * relative to received dlc -1 .. -8 : * e.g. for received dlc = 8 * -1 => index = 7 (data[7]) @@ -191,27 +240,27 @@ static int cgw_chk_csum_parms(s8 fr, s8 to, s8 re) * -8 => index = 0 (data[0]) */ - if (fr > -9 && fr < 8 && - to > -9 && to < 8 && - re > -9 && re < 8) + if (fr >= -dlen && fr < dlen && + to >= -dlen && to < dlen && + re >= -dlen && re < dlen) return 0; else return -EINVAL; } -static inline int calc_idx(int idx, int rx_dlc) +static inline int calc_idx(int idx, int rx_len) { if (idx < 0) - return rx_dlc + idx; + return rx_len + idx; else return idx; } -static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor) +static void cgw_csum_xor_rel(struct canfd_frame *cf, struct cgw_csum_xor *xor) { - int from = calc_idx(xor->from_idx, cf->can_dlc); - int to = calc_idx(xor->to_idx, cf->can_dlc); - int res = calc_idx(xor->result_idx, cf->can_dlc); + int from = calc_idx(xor->from_idx, cf->len); + int to = calc_idx(xor->to_idx, cf->len); + int res = calc_idx(xor->result_idx, cf->len); u8 val = xor->init_xor_val; int i; @@ -229,7 +278,7 @@ static void cgw_csum_xor_rel(struct can_frame *cf, struct cgw_csum_xor *xor) cf->data[res] = val; } -static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor) +static void cgw_csum_xor_pos(struct canfd_frame *cf, struct cgw_csum_xor *xor) { u8 val = xor->init_xor_val; int i; @@ -240,7 +289,7 @@ static void cgw_csum_xor_pos(struct can_frame *cf, struct cgw_csum_xor *xor) cf->data[xor->result_idx] = val; } -static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor) +static void cgw_csum_xor_neg(struct canfd_frame *cf, struct cgw_csum_xor *xor) { u8 val = xor->init_xor_val; int i; @@ -251,11 +300,12 @@ static void cgw_csum_xor_neg(struct can_frame *cf, struct cgw_csum_xor *xor) cf->data[xor->result_idx] = val; } -static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +static void cgw_csum_crc8_rel(struct canfd_frame *cf, + struct cgw_csum_crc8 *crc8) { - int from = calc_idx(crc8->from_idx, cf->can_dlc); - int to = calc_idx(crc8->to_idx, cf->can_dlc); - int res = calc_idx(crc8->result_idx, cf->can_dlc); + int from = calc_idx(crc8->from_idx, cf->len); + int to = calc_idx(crc8->to_idx, cf->len); + int res = calc_idx(crc8->result_idx, cf->len); u8 crc = crc8->init_crc_val; int i; @@ -264,96 +314,102 @@ static void cgw_csum_crc8_rel(struct can_frame *cf, struct cgw_csum_crc8 *crc8) if (from <= to) { for (i = crc8->from_idx; i <= crc8->to_idx; i++) - crc = crc8->crctab[crc^cf->data[i]]; + crc = crc8->crctab[crc ^ cf->data[i]]; } else { for (i = crc8->from_idx; i >= crc8->to_idx; i--) - crc = crc8->crctab[crc^cf->data[i]]; + crc = crc8->crctab[crc ^ cf->data[i]]; } switch (crc8->profile) { - case CGW_CRC8PRF_1U8: - crc = crc8->crctab[crc^crc8->profile_data[0]]; + crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: - crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: - crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; - } - cf->data[crc8->result_idx] = crc^crc8->final_xor_val; + cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } -static void cgw_csum_crc8_pos(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +static void cgw_csum_crc8_pos(struct canfd_frame *cf, + struct cgw_csum_crc8 *crc8) { u8 crc = crc8->init_crc_val; int i; for (i = crc8->from_idx; i <= crc8->to_idx; i++) - crc = crc8->crctab[crc^cf->data[i]]; + crc = crc8->crctab[crc ^ cf->data[i]]; switch (crc8->profile) { - case CGW_CRC8PRF_1U8: - crc = crc8->crctab[crc^crc8->profile_data[0]]; + crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: - crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: - crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } - cf->data[crc8->result_idx] = crc^crc8->final_xor_val; + cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } -static void cgw_csum_crc8_neg(struct can_frame *cf, struct cgw_csum_crc8 *crc8) +static void cgw_csum_crc8_neg(struct canfd_frame *cf, + struct cgw_csum_crc8 *crc8) { u8 crc = crc8->init_crc_val; int i; for (i = crc8->from_idx; i >= crc8->to_idx; i--) - crc = crc8->crctab[crc^cf->data[i]]; + crc = crc8->crctab[crc ^ cf->data[i]]; switch (crc8->profile) { - case CGW_CRC8PRF_1U8: - crc = crc8->crctab[crc^crc8->profile_data[0]]; + crc = crc8->crctab[crc ^ crc8->profile_data[0]]; break; case CGW_CRC8PRF_16U8: - crc = crc8->crctab[crc^crc8->profile_data[cf->data[1] & 0xF]]; + crc = crc8->crctab[crc ^ crc8->profile_data[cf->data[1] & 0xF]]; break; case CGW_CRC8PRF_SFFID_XOR: - crc = crc8->crctab[crc^(cf->can_id & 0xFF)^ + crc = crc8->crctab[crc ^ (cf->can_id & 0xFF) ^ (cf->can_id >> 8 & 0xFF)]; break; } - cf->data[crc8->result_idx] = crc^crc8->final_xor_val; + cf->data[crc8->result_idx] = crc ^ crc8->final_xor_val; } /* the receive & process & send function */ static void can_can_gw_rcv(struct sk_buff *skb, void *data) { struct cgw_job *gwj = (struct cgw_job *)data; - struct can_frame *cf; + struct canfd_frame *cf; struct sk_buff *nskb; int modidx = 0; - /* - * Do not handle CAN frames routed more than 'max_hops' times. + /* process strictly Classic CAN or CAN FD frames */ + if (gwj->flags & CGW_FLAGS_CAN_FD) { + if (skb->len != CANFD_MTU) + return; + } else { + if (skb->len != CAN_MTU) + return; + } + + /* Do not handle CAN frames routed more than 'max_hops' times. * In general we should never catch this delimiter which is intended * to cover a misconfiguration protection (e.g. circular CAN routes). * @@ -384,8 +440,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) can_skb_prv(skb)->ifindex == gwj->dst.dev->ifindex) return; - /* - * clone the given skb, which has not been done in can_rcv() + /* clone the given skb, which has not been done in can_rcv() * * When there is at least one modification function activated, * we need to copy the skb as we want to modify skb->data. @@ -410,7 +465,7 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) nskb->dev = gwj->dst.dev; /* pointer to modifiable CAN frame */ - cf = (struct can_frame *)nskb->data; + cf = (struct canfd_frame *)nskb->data; /* perform preprocessed modification functions if there are any */ while (modidx < MAX_MODFUNCTIONS && gwj->mod.modfunc[modidx]) @@ -419,26 +474,22 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) /* Has the CAN frame been modified? */ if (modidx) { /* get available space for the processed CAN frame type */ - int max_len = nskb->len - offsetof(struct can_frame, data); + int max_len = nskb->len - offsetof(struct canfd_frame, data); /* dlc may have changed, make sure it fits to the CAN frame */ - if (cf->can_dlc > max_len) - goto out_delete; - - /* check for checksum updates in classic CAN length only */ - if (gwj->mod.csumfunc.crc8) { - if (cf->can_dlc > 8) - goto out_delete; - - (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (cf->len > max_len) { + /* delete frame due to misconfiguration */ + gwj->deleted_frames++; + kfree_skb(nskb); + return; } - if (gwj->mod.csumfunc.xor) { - if (cf->can_dlc > 8) - goto out_delete; + /* check for checksum updates */ + if (gwj->mod.csumfunc.crc8) + (*gwj->mod.csumfunc.crc8)(cf, &gwj->mod.csum.crc8); + if (gwj->mod.csumfunc.xor) (*gwj->mod.csumfunc.xor)(cf, &gwj->mod.csum.xor); - } } /* clear the skb timestamp if not configured the other way */ @@ -450,14 +501,6 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) gwj->dropped_frames++; else gwj->handled_frames++; - - return; - - out_delete: - /* delete frame due to misconfiguration */ - gwj->deleted_frames++; - kfree_skb(nskb); - return; } static inline int cgw_register_filter(struct net *net, struct cgw_job *gwj) @@ -483,14 +526,12 @@ static int cgw_notifier(struct notifier_block *nb, return NOTIFY_DONE; if (msg == NETDEV_UNREGISTER) { - struct cgw_job *gwj = NULL; struct hlist_node *nx; ASSERT_RTNL(); hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { - if (gwj->src.dev == dev || gwj->dst.dev == dev) { hlist_del(&gwj->list); cgw_unregister_filter(net, gwj); @@ -505,7 +546,6 @@ static int cgw_notifier(struct notifier_block *nb, static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, u32 pid, u32 seq, int flags) { - struct cgw_frame_mod mb; struct rtcanmsg *rtcan; struct nlmsghdr *nlh; @@ -542,32 +582,66 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, goto cancel; } - if (gwj->mod.modtype.and) { - memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.and; - if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) - goto cancel; - } + if (gwj->flags & CGW_FLAGS_CAN_FD) { + struct cgw_fdframe_mod mb; - if (gwj->mod.modtype.or) { - memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.or; - if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) - goto cancel; - } + if (gwj->mod.modtype.and) { + memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.and; + if (nla_put(skb, CGW_FDMOD_AND, sizeof(mb), &mb) < 0) + goto cancel; + } - if (gwj->mod.modtype.xor) { - memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.xor; - if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) - goto cancel; - } + if (gwj->mod.modtype.or) { + memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.or; + if (nla_put(skb, CGW_FDMOD_OR, sizeof(mb), &mb) < 0) + goto cancel; + } - if (gwj->mod.modtype.set) { - memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); - mb.modtype = gwj->mod.modtype.set; - if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) - goto cancel; + if (gwj->mod.modtype.xor) { + memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.xor; + if (nla_put(skb, CGW_FDMOD_XOR, sizeof(mb), &mb) < 0) + goto cancel; + } + + if (gwj->mod.modtype.set) { + memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.set; + if (nla_put(skb, CGW_FDMOD_SET, sizeof(mb), &mb) < 0) + goto cancel; + } + } else { + struct cgw_frame_mod mb; + + if (gwj->mod.modtype.and) { + memcpy(&mb.cf, &gwj->mod.modframe.and, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.and; + if (nla_put(skb, CGW_MOD_AND, sizeof(mb), &mb) < 0) + goto cancel; + } + + if (gwj->mod.modtype.or) { + memcpy(&mb.cf, &gwj->mod.modframe.or, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.or; + if (nla_put(skb, CGW_MOD_OR, sizeof(mb), &mb) < 0) + goto cancel; + } + + if (gwj->mod.modtype.xor) { + memcpy(&mb.cf, &gwj->mod.modframe.xor, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.xor; + if (nla_put(skb, CGW_MOD_XOR, sizeof(mb), &mb) < 0) + goto cancel; + } + + if (gwj->mod.modtype.set) { + memcpy(&mb.cf, &gwj->mod.modframe.set, sizeof(mb.cf)); + mb.modtype = gwj->mod.modtype.set; + if (nla_put(skb, CGW_MOD_SET, sizeof(mb), &mb) < 0) + goto cancel; + } } if (gwj->mod.uid) { @@ -588,7 +662,6 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type, } if (gwj->gwtype == CGW_TYPE_CAN_CAN) { - if (gwj->ccgw.filter.can_id || gwj->ccgw.filter.can_mask) { if (nla_put(skb, CGW_FILTER, sizeof(struct can_filter), &gwj->ccgw.filter) < 0) @@ -623,8 +696,9 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) + if (cgw_put_job(skb, gwj, RTM_NEWROUTE, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: idx++; @@ -636,7 +710,7 @@ cont: return skb->len; } -static const struct nla_policy cgw_policy[CGW_MAX+1] = { +static const struct nla_policy cgw_policy[CGW_MAX + 1] = { [CGW_MOD_AND] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_OR] = { .len = sizeof(struct cgw_frame_mod) }, [CGW_MOD_XOR] = { .len = sizeof(struct cgw_frame_mod) }, @@ -648,14 +722,18 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = { [CGW_FILTER] = { .len = sizeof(struct can_filter) }, [CGW_LIM_HOPS] = { .type = NLA_U8 }, [CGW_MOD_UID] = { .type = NLA_U32 }, + [CGW_FDMOD_AND] = { .len = sizeof(struct cgw_fdframe_mod) }, + [CGW_FDMOD_OR] = { .len = sizeof(struct cgw_fdframe_mod) }, + [CGW_FDMOD_XOR] = { .len = sizeof(struct cgw_fdframe_mod) }, + [CGW_FDMOD_SET] = { .len = sizeof(struct cgw_fdframe_mod) }, }; /* check for common and gwtype specific attributes */ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, u8 gwtype, void *gwtypeattr, u8 *limhops) { - struct nlattr *tb[CGW_MAX+1]; - struct cgw_frame_mod mb; + struct nlattr *tb[CGW_MAX + 1]; + struct rtcanmsg *r = nlmsg_data(nlh); int modidx = 0; int err = 0; @@ -675,87 +753,166 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, } /* check for AND/OR/XOR/SET modifications */ + if (r->flags & CGW_FLAGS_CAN_FD) { + struct cgw_fdframe_mod mb; - if (tb[CGW_MOD_AND]) { - nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); + if (tb[CGW_FDMOD_AND]) { + nla_memcpy(&mb, tb[CGW_FDMOD_AND], CGW_FDMODATTR_LEN); - canframecpy(&mod->modframe.and, &mb.cf); - mod->modtype.and = mb.modtype; + canfdframecpy(&mod->modframe.and, &mb.cf); + mod->modtype.and = mb.modtype; - if (mb.modtype & CGW_MOD_ID) - mod->modfunc[modidx++] = mod_and_id; + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_and_id; - if (mb.modtype & CGW_MOD_DLC) - mod->modfunc[modidx++] = mod_and_dlc; + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_and_len; - if (mb.modtype & CGW_MOD_DATA) - mod->modfunc[modidx++] = mod_and_data; - } + if (mb.modtype & CGW_MOD_FLAGS) + mod->modfunc[modidx++] = mod_and_flags; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_and_fddata; + } - if (tb[CGW_MOD_OR]) { - nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); + if (tb[CGW_FDMOD_OR]) { + nla_memcpy(&mb, tb[CGW_FDMOD_OR], CGW_FDMODATTR_LEN); - canframecpy(&mod->modframe.or, &mb.cf); - mod->modtype.or = mb.modtype; + canfdframecpy(&mod->modframe.or, &mb.cf); + mod->modtype.or = mb.modtype; - if (mb.modtype & CGW_MOD_ID) - mod->modfunc[modidx++] = mod_or_id; + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_or_id; - if (mb.modtype & CGW_MOD_DLC) - mod->modfunc[modidx++] = mod_or_dlc; + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_or_len; - if (mb.modtype & CGW_MOD_DATA) - mod->modfunc[modidx++] = mod_or_data; - } + if (mb.modtype & CGW_MOD_FLAGS) + mod->modfunc[modidx++] = mod_or_flags; - if (tb[CGW_MOD_XOR]) { - nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_or_fddata; + } - canframecpy(&mod->modframe.xor, &mb.cf); - mod->modtype.xor = mb.modtype; + if (tb[CGW_FDMOD_XOR]) { + nla_memcpy(&mb, tb[CGW_FDMOD_XOR], CGW_FDMODATTR_LEN); - if (mb.modtype & CGW_MOD_ID) - mod->modfunc[modidx++] = mod_xor_id; + canfdframecpy(&mod->modframe.xor, &mb.cf); + mod->modtype.xor = mb.modtype; - if (mb.modtype & CGW_MOD_DLC) - mod->modfunc[modidx++] = mod_xor_dlc; + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_xor_id; - if (mb.modtype & CGW_MOD_DATA) - mod->modfunc[modidx++] = mod_xor_data; - } + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_xor_len; - if (tb[CGW_MOD_SET]) { - nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); + if (mb.modtype & CGW_MOD_FLAGS) + mod->modfunc[modidx++] = mod_xor_flags; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_xor_fddata; + } - canframecpy(&mod->modframe.set, &mb.cf); - mod->modtype.set = mb.modtype; + if (tb[CGW_FDMOD_SET]) { + nla_memcpy(&mb, tb[CGW_FDMOD_SET], CGW_FDMODATTR_LEN); + + canfdframecpy(&mod->modframe.set, &mb.cf); + mod->modtype.set = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_set_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_set_len; + + if (mb.modtype & CGW_MOD_FLAGS) + mod->modfunc[modidx++] = mod_set_flags; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_set_fddata; + } + } else { + struct cgw_frame_mod mb; - if (mb.modtype & CGW_MOD_ID) - mod->modfunc[modidx++] = mod_set_id; + if (tb[CGW_MOD_AND]) { + nla_memcpy(&mb, tb[CGW_MOD_AND], CGW_MODATTR_LEN); - if (mb.modtype & CGW_MOD_DLC) - mod->modfunc[modidx++] = mod_set_dlc; + canframecpy(&mod->modframe.and, &mb.cf); + mod->modtype.and = mb.modtype; - if (mb.modtype & CGW_MOD_DATA) - mod->modfunc[modidx++] = mod_set_data; + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_and_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_and_len; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_and_data; + } + + if (tb[CGW_MOD_OR]) { + nla_memcpy(&mb, tb[CGW_MOD_OR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.or, &mb.cf); + mod->modtype.or = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_or_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_or_len; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_or_data; + } + + if (tb[CGW_MOD_XOR]) { + nla_memcpy(&mb, tb[CGW_MOD_XOR], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.xor, &mb.cf); + mod->modtype.xor = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_xor_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_xor_len; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_xor_data; + } + + if (tb[CGW_MOD_SET]) { + nla_memcpy(&mb, tb[CGW_MOD_SET], CGW_MODATTR_LEN); + + canframecpy(&mod->modframe.set, &mb.cf); + mod->modtype.set = mb.modtype; + + if (mb.modtype & CGW_MOD_ID) + mod->modfunc[modidx++] = mod_set_id; + + if (mb.modtype & CGW_MOD_LEN) + mod->modfunc[modidx++] = mod_set_len; + + if (mb.modtype & CGW_MOD_DATA) + mod->modfunc[modidx++] = mod_set_data; + } } /* check for checksum operations after CAN frame modifications */ if (modidx) { - if (tb[CGW_CS_CRC8]) { struct cgw_csum_crc8 *c = nla_data(tb[CGW_CS_CRC8]); err = cgw_chk_csum_parms(c->from_idx, c->to_idx, - c->result_idx); + c->result_idx, r); if (err) return err; nla_memcpy(&mod->csum.crc8, tb[CGW_CS_CRC8], CGW_CS_CRC8_LEN); - /* - * select dedicated processing function to reduce + /* select dedicated processing function to reduce * runtime operations in receive hot path. */ if (c->from_idx < 0 || c->to_idx < 0 || @@ -771,15 +928,14 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, struct cgw_csum_xor *c = nla_data(tb[CGW_CS_XOR]); err = cgw_chk_csum_parms(c->from_idx, c->to_idx, - c->result_idx); + c->result_idx, r); if (err) return err; nla_memcpy(&mod->csum.xor, tb[CGW_CS_XOR], CGW_CS_XOR_LEN); - /* - * select dedicated processing function to reduce + /* select dedicated processing function to reduce * runtime operations in receive hot path. */ if (c->from_idx < 0 || c->to_idx < 0 || @@ -791,16 +947,14 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod, mod->csumfunc.xor = cgw_csum_xor_neg; } - if (tb[CGW_MOD_UID]) { + if (tb[CGW_MOD_UID]) nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32)); - } } if (gwtype == CGW_TYPE_CAN_CAN) { - /* check CGW_TYPE_CAN_CAN specific attributes */ - struct can_can_gw *ccgw = (struct can_can_gw *)gwtypeattr; + memset(ccgw, 0, sizeof(*ccgw)); /* check for can_filter in attributes */ @@ -861,12 +1015,10 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (mod.uid) { - ASSERT_RTNL(); /* check for updating an existing job with identical uid */ hlist_for_each_entry(gwj, &net->can.cgw_list, list) { - if (gwj->mod.uid != mod.uid) continue; @@ -987,7 +1139,6 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh, /* remove only the first matching entry */ hlist_for_each_entry_safe(gwj, nx, &net->can.cgw_list, list) { - if (gwj->flags != r->flags) continue; diff --git a/net/can/j1939/Kconfig b/net/can/j1939/Kconfig new file mode 100644 index 000000000000..2998298b71ec --- /dev/null +++ b/net/can/j1939/Kconfig @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# SAE J1939 network layer core configuration +# + +config CAN_J1939 + tristate "SAE J1939" + depends on CAN + help + SAE J1939 + Say Y to have in-kernel support for j1939 socket type. This + allows communication according to SAE j1939. + The relevant parts in kernel are + SAE j1939-21 (datalink & transport protocol) + & SAE j1939-81 (network management). diff --git a/net/can/j1939/Makefile b/net/can/j1939/Makefile new file mode 100644 index 000000000000..19181bdae173 --- /dev/null +++ b/net/can/j1939/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_CAN_J1939) += can-j1939.o + +can-j1939-objs := \ + address-claim.o \ + bus.o \ + main.o \ + socket.o \ + transport.o diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c new file mode 100644 index 000000000000..f33c47327927 --- /dev/null +++ b/net/can/j1939/address-claim.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <pieter.beyens@eia.be> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +/* J1939 Address Claiming. + * Address Claiming in the kernel + * - keeps track of the AC states of ECU's, + * - resolves NAME<=>SA taking into account the AC states of ECU's. + * + * All Address Claim msgs (including host-originated msg) are processed + * at the receive path (a sent msg is always received again via CAN echo). + * As such, the processing of AC msgs is done in the order on which msgs + * are sent on the bus. + * + * This module doesn't send msgs itself (e.g. replies on Address Claims), + * this is the responsibility of a user space application or daemon. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +#include "j1939-priv.h" + +static inline name_t j1939_skb_to_name(const struct sk_buff *skb) +{ + return le64_to_cpup((__le64 *)skb->data); +} + +static inline bool j1939_ac_msg_is_request(struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int req_pgn; + + if (skb->len < 3 || skcb->addr.pgn != J1939_PGN_REQUEST) + return false; + + req_pgn = skb->data[0] | (skb->data[1] << 8) | (skb->data[2] << 16); + + return req_pgn == J1939_PGN_ADDRESS_CLAIMED; +} + +static int j1939_ac_verify_outgoing(struct j1939_priv *priv, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + + if (skb->len != 8) { + netdev_notice(priv->ndev, "tx address claim with dlc %i\n", + skb->len); + return -EPROTO; + } + + if (skcb->addr.src_name != j1939_skb_to_name(skb)) { + netdev_notice(priv->ndev, "tx address claim with different name\n"); + return -EPROTO; + } + + if (skcb->addr.sa == J1939_NO_ADDR) { + netdev_notice(priv->ndev, "tx address claim with broadcast sa\n"); + return -EPROTO; + } + + /* ac must always be a broadcast */ + if (skcb->addr.dst_name || skcb->addr.da != J1939_NO_ADDR) { + netdev_notice(priv->ndev, "tx address claim with dest, not broadcast\n"); + return -EPROTO; + } + return 0; +} + +int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int ret; + u8 addr; + + /* network mgmt: address claiming msgs */ + if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { + struct j1939_ecu *ecu; + + ret = j1939_ac_verify_outgoing(priv, skb); + /* return both when failure & when successful */ + if (ret < 0) + return ret; + ecu = j1939_ecu_get_by_name(priv, skcb->addr.src_name); + if (!ecu) + return -ENODEV; + + if (ecu->addr != skcb->addr.sa) + /* hold further traffic for ecu, remove from parent */ + j1939_ecu_unmap(ecu); + j1939_ecu_put(ecu); + } else if (skcb->addr.src_name) { + /* assign source address */ + addr = j1939_name_to_addr(priv, skcb->addr.src_name); + if (!j1939_address_is_unicast(addr) && + !j1939_ac_msg_is_request(skb)) { + netdev_notice(priv->ndev, "tx drop: invalid sa for name 0x%016llx\n", + skcb->addr.src_name); + return -EADDRNOTAVAIL; + } + skcb->addr.sa = addr; + } + + /* assign destination address */ + if (skcb->addr.dst_name) { + addr = j1939_name_to_addr(priv, skcb->addr.dst_name); + if (!j1939_address_is_unicast(addr)) { + netdev_notice(priv->ndev, "tx drop: invalid da for name 0x%016llx\n", + skcb->addr.dst_name); + return -EADDRNOTAVAIL; + } + skcb->addr.da = addr; + } + return 0; +} + +static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_ecu *ecu, *prev; + name_t name; + + if (skb->len != 8) { + netdev_notice(priv->ndev, "rx address claim with wrong dlc %i\n", + skb->len); + return; + } + + name = j1939_skb_to_name(skb); + skcb->addr.src_name = name; + if (!name) { + netdev_notice(priv->ndev, "rx address claim without name\n"); + return; + } + + if (!j1939_address_is_valid(skcb->addr.sa)) { + netdev_notice(priv->ndev, "rx address claim with broadcast sa\n"); + return; + } + + write_lock_bh(&priv->lock); + + /* Few words on the ECU ref counting: + * + * First we get an ECU handle, either with + * j1939_ecu_get_by_name_locked() (increments the ref counter) + * or j1939_ecu_create_locked() (initializes an ECU object + * with a ref counter of 1). + * + * j1939_ecu_unmap_locked() will decrement the ref counter, + * but only if the ECU was mapped before. So "ecu" still + * belongs to us. + * + * j1939_ecu_timer_start() will increment the ref counter + * before it starts the timer, so we can put the ecu when + * leaving this function. + */ + ecu = j1939_ecu_get_by_name_locked(priv, name); + if (!ecu && j1939_address_is_unicast(skcb->addr.sa)) + ecu = j1939_ecu_create_locked(priv, name); + + if (IS_ERR_OR_NULL(ecu)) + goto out_unlock_bh; + + /* cancel pending (previous) address claim */ + j1939_ecu_timer_cancel(ecu); + + if (j1939_address_is_idle(skcb->addr.sa)) { + j1939_ecu_unmap_locked(ecu); + goto out_ecu_put; + } + + /* save new addr */ + if (ecu->addr != skcb->addr.sa) + j1939_ecu_unmap_locked(ecu); + ecu->addr = skcb->addr.sa; + + prev = j1939_ecu_get_by_addr_locked(priv, skcb->addr.sa); + if (prev) { + if (ecu->name > prev->name) { + j1939_ecu_unmap_locked(ecu); + j1939_ecu_put(prev); + goto out_ecu_put; + } else { + /* kick prev if less or equal */ + j1939_ecu_unmap_locked(prev); + j1939_ecu_put(prev); + } + } + + j1939_ecu_timer_start(ecu); + out_ecu_put: + j1939_ecu_put(ecu); + out_unlock_bh: + write_unlock_bh(&priv->lock); +} + +void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_ecu *ecu; + + /* network mgmt */ + if (skcb->addr.pgn == J1939_PGN_ADDRESS_CLAIMED) { + j1939_ac_process(priv, skb); + } else if (j1939_address_is_unicast(skcb->addr.sa)) { + /* assign source name */ + ecu = j1939_ecu_get_by_addr(priv, skcb->addr.sa); + if (ecu) { + skcb->addr.src_name = ecu->name; + j1939_ecu_put(ecu); + } + } + + /* assign destination name */ + ecu = j1939_ecu_get_by_addr(priv, skcb->addr.da); + if (ecu) { + skcb->addr.dst_name = ecu->name; + j1939_ecu_put(ecu); + } +} diff --git a/net/can/j1939/bus.c b/net/can/j1939/bus.c new file mode 100644 index 000000000000..486687901602 --- /dev/null +++ b/net/can/j1939/bus.c @@ -0,0 +1,333 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +/* bus for j1939 remote devices + * Since rtnetlink, no real bus is used. + */ + +#include <net/sock.h> + +#include "j1939-priv.h" + +static void __j1939_ecu_release(struct kref *kref) +{ + struct j1939_ecu *ecu = container_of(kref, struct j1939_ecu, kref); + struct j1939_priv *priv = ecu->priv; + + list_del(&ecu->list); + kfree(ecu); + j1939_priv_put(priv); +} + +void j1939_ecu_put(struct j1939_ecu *ecu) +{ + kref_put(&ecu->kref, __j1939_ecu_release); +} + +static void j1939_ecu_get(struct j1939_ecu *ecu) +{ + kref_get(&ecu->kref); +} + +static bool j1939_ecu_is_mapped_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + + lockdep_assert_held(&priv->lock); + + return j1939_ecu_find_by_addr_locked(priv, ecu->addr) == ecu; +} + +/* ECU device interface */ +/* map ECU to a bus address space */ +static void j1939_ecu_map_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + struct j1939_addr_ent *ent; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(ecu->addr)) + return; + + ent = &priv->ents[ecu->addr]; + + if (ent->ecu) { + netdev_warn(priv->ndev, "Trying to map already mapped ECU, addr: 0x%02x, name: 0x%016llx. Skip it.\n", + ecu->addr, ecu->name); + return; + } + + j1939_ecu_get(ecu); + ent->ecu = ecu; + ent->nusers += ecu->nusers; +} + +/* unmap ECU from a bus address space */ +void j1939_ecu_unmap_locked(struct j1939_ecu *ecu) +{ + struct j1939_priv *priv = ecu->priv; + struct j1939_addr_ent *ent; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(ecu->addr)) + return; + + if (!j1939_ecu_is_mapped_locked(ecu)) + return; + + ent = &priv->ents[ecu->addr]; + ent->ecu = NULL; + ent->nusers -= ecu->nusers; + j1939_ecu_put(ecu); +} + +void j1939_ecu_unmap(struct j1939_ecu *ecu) +{ + write_lock_bh(&ecu->priv->lock); + j1939_ecu_unmap_locked(ecu); + write_unlock_bh(&ecu->priv->lock); +} + +void j1939_ecu_unmap_all(struct j1939_priv *priv) +{ + int i; + + write_lock_bh(&priv->lock); + for (i = 0; i < ARRAY_SIZE(priv->ents); i++) + if (priv->ents[i].ecu) + j1939_ecu_unmap_locked(priv->ents[i].ecu); + write_unlock_bh(&priv->lock); +} + +void j1939_ecu_timer_start(struct j1939_ecu *ecu) +{ + /* The ECU is held here and released in the + * j1939_ecu_timer_handler() or j1939_ecu_timer_cancel(). + */ + j1939_ecu_get(ecu); + + /* Schedule timer in 250 msec to commit address change. */ + hrtimer_start(&ecu->ac_timer, ms_to_ktime(250), + HRTIMER_MODE_REL_SOFT); +} + +void j1939_ecu_timer_cancel(struct j1939_ecu *ecu) +{ + if (hrtimer_cancel(&ecu->ac_timer)) + j1939_ecu_put(ecu); +} + +static enum hrtimer_restart j1939_ecu_timer_handler(struct hrtimer *hrtimer) +{ + struct j1939_ecu *ecu = + container_of(hrtimer, struct j1939_ecu, ac_timer); + struct j1939_priv *priv = ecu->priv; + + write_lock_bh(&priv->lock); + /* TODO: can we test if ecu->addr is unicast before starting + * the timer? + */ + j1939_ecu_map_locked(ecu); + + /* The corresponding j1939_ecu_get() is in + * j1939_ecu_timer_start(). + */ + j1939_ecu_put(ecu); + write_unlock_bh(&priv->lock); + + return HRTIMER_NORESTART; +} + +struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + ecu = kzalloc(sizeof(*ecu), gfp_any()); + if (!ecu) + return ERR_PTR(-ENOMEM); + kref_init(&ecu->kref); + ecu->addr = J1939_IDLE_ADDR; + ecu->name = name; + + hrtimer_init(&ecu->ac_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); + ecu->ac_timer.function = j1939_ecu_timer_handler; + INIT_LIST_HEAD(&ecu->list); + + j1939_priv_get(priv); + ecu->priv = priv; + list_add_tail(&ecu->list, &priv->ecus); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv, + u8 addr) +{ + lockdep_assert_held(&priv->lock); + + return priv->ents[addr].ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, u8 addr) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + if (!j1939_address_is_unicast(addr)) + return NULL; + + ecu = j1939_ecu_find_by_addr_locked(priv, addr); + if (ecu) + j1939_ecu_get(ecu); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr) +{ + struct j1939_ecu *ecu; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_get_by_addr_locked(priv, addr); + read_unlock_bh(&priv->lock); + + return ecu; +} + +/* get pointer to ecu without increasing ref counter */ +static struct j1939_ecu *j1939_ecu_find_by_name_locked(struct j1939_priv *priv, + name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + list_for_each_entry(ecu, &priv->ecus, list) { + if (ecu->name == name) + return ecu; + } + + return NULL; +} + +struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv, + name_t name) +{ + struct j1939_ecu *ecu; + + lockdep_assert_held(&priv->lock); + + if (!name) + return NULL; + + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (ecu) + j1939_ecu_get(ecu); + + return ecu; +} + +struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_get_by_name_locked(priv, name); + read_unlock_bh(&priv->lock); + + return ecu; +} + +u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name) +{ + struct j1939_ecu *ecu; + int addr = J1939_IDLE_ADDR; + + if (!name) + return J1939_NO_ADDR; + + read_lock_bh(&priv->lock); + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (ecu && j1939_ecu_is_mapped_locked(ecu)) + /* ecu's SA is registered */ + addr = ecu->addr; + + read_unlock_bh(&priv->lock); + + return addr; +} + +/* TX addr/name accounting + * Transport protocol needs to know if a SA is local or not + * These functions originate from userspace manipulating sockets, + * so locking is straigforward + */ + +int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa) +{ + struct j1939_ecu *ecu; + int err = 0; + + write_lock_bh(&priv->lock); + + if (j1939_address_is_unicast(sa)) + priv->ents[sa].nusers++; + + if (!name) + goto done; + + ecu = j1939_ecu_get_by_name_locked(priv, name); + if (!ecu) + ecu = j1939_ecu_create_locked(priv, name); + err = PTR_ERR_OR_ZERO(ecu); + if (err) + goto done; + + ecu->nusers++; + /* TODO: do we care if ecu->addr != sa? */ + if (j1939_ecu_is_mapped_locked(ecu)) + /* ecu's sa is active already */ + priv->ents[ecu->addr].nusers++; + + done: + write_unlock_bh(&priv->lock); + + return err; +} + +void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa) +{ + struct j1939_ecu *ecu; + + write_lock_bh(&priv->lock); + + if (j1939_address_is_unicast(sa)) + priv->ents[sa].nusers--; + + if (!name) + goto done; + + ecu = j1939_ecu_find_by_name_locked(priv, name); + if (WARN_ON_ONCE(!ecu)) + goto done; + + ecu->nusers--; + /* TODO: do we care if ecu->addr != sa? */ + if (j1939_ecu_is_mapped_locked(ecu)) + /* ecu's sa is active already */ + priv->ents[ecu->addr].nusers--; + j1939_ecu_put(ecu); + + done: + write_unlock_bh(&priv->lock); +} diff --git a/net/can/j1939/j1939-priv.h b/net/can/j1939/j1939-priv.h new file mode 100644 index 000000000000..12369b604ce9 --- /dev/null +++ b/net/can/j1939/j1939-priv.h @@ -0,0 +1,338 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +#ifndef _J1939_PRIV_H_ +#define _J1939_PRIV_H_ + +#include <linux/can/j1939.h> +#include <net/sock.h> + +/* Timeout to receive the abort signal over loop back. In case CAN + * bus is open, the timeout should be triggered. + */ +#define J1939_XTP_ABORT_TIMEOUT_MS 500 +#define J1939_SIMPLE_ECHO_TIMEOUT_MS (10 * 1000) + +struct j1939_session; +enum j1939_sk_errqueue_type { + J1939_ERRQUEUE_ACK, + J1939_ERRQUEUE_SCHED, + J1939_ERRQUEUE_ABORT, +}; + +/* j1939 devices */ +struct j1939_ecu { + struct list_head list; + name_t name; + u8 addr; + + /* indicates that this ecu successfully claimed @sa as its address */ + struct hrtimer ac_timer; + struct kref kref; + struct j1939_priv *priv; + + /* count users, to help transport protocol decide for interaction */ + int nusers; +}; + +struct j1939_priv { + struct list_head ecus; + /* local list entry in priv + * These allow irq (& softirq) context lookups on j1939 devices + * This approach (separate lists) is done as the other 2 alternatives + * are not easier or even wrong + * 1) using the pure kobject methods involves mutexes, which are not + * allowed in irq context. + * 2) duplicating data structures would require a lot of synchronization + * code + * usage: + */ + + /* segments need a lock to protect the above list */ + rwlock_t lock; + + struct net_device *ndev; + + /* list of 256 ecu ptrs, that cache the claimed addresses. + * also protected by the above lock + */ + struct j1939_addr_ent { + struct j1939_ecu *ecu; + /* count users, to help transport protocol */ + int nusers; + } ents[256]; + + struct kref kref; + + /* List of active sessions to prevent start of conflicting + * one. + * + * Do not start two sessions of same type, addresses and + * direction. + */ + struct list_head active_session_list; + + /* protects active_session_list */ + spinlock_t active_session_list_lock; + + unsigned int tp_max_packet_size; + + /* lock for j1939_socks list */ + spinlock_t j1939_socks_lock; + struct list_head j1939_socks; + + struct kref rx_kref; +}; + +void j1939_ecu_put(struct j1939_ecu *ecu); + +/* keep the cache of what is local */ +int j1939_local_ecu_get(struct j1939_priv *priv, name_t name, u8 sa); +void j1939_local_ecu_put(struct j1939_priv *priv, name_t name, u8 sa); + +static inline bool j1939_address_is_unicast(u8 addr) +{ + return addr <= J1939_MAX_UNICAST_ADDR; +} + +static inline bool j1939_address_is_idle(u8 addr) +{ + return addr == J1939_IDLE_ADDR; +} + +static inline bool j1939_address_is_valid(u8 addr) +{ + return addr != J1939_NO_ADDR; +} + +static inline bool j1939_pgn_is_pdu1(pgn_t pgn) +{ + /* ignore dp & res bits for this */ + return (pgn & 0xff00) < 0xf000; +} + +/* utility to correctly unmap an ECU */ +void j1939_ecu_unmap_locked(struct j1939_ecu *ecu); +void j1939_ecu_unmap(struct j1939_ecu *ecu); + +u8 j1939_name_to_addr(struct j1939_priv *priv, name_t name); +struct j1939_ecu *j1939_ecu_find_by_addr_locked(struct j1939_priv *priv, + u8 addr); +struct j1939_ecu *j1939_ecu_get_by_addr(struct j1939_priv *priv, u8 addr); +struct j1939_ecu *j1939_ecu_get_by_addr_locked(struct j1939_priv *priv, + u8 addr); +struct j1939_ecu *j1939_ecu_get_by_name(struct j1939_priv *priv, name_t name); +struct j1939_ecu *j1939_ecu_get_by_name_locked(struct j1939_priv *priv, + name_t name); + +enum j1939_transfer_type { + J1939_TP, + J1939_ETP, + J1939_SIMPLE, +}; + +struct j1939_addr { + name_t src_name; + name_t dst_name; + pgn_t pgn; + + u8 sa; + u8 da; + + u8 type; +}; + +/* control buffer of the sk_buff */ +struct j1939_sk_buff_cb { + /* Offset in bytes within one ETP session */ + u32 offset; + + /* for tx, MSG_SYN will be used to sync on sockets */ + u32 msg_flags; + u32 tskey; + + struct j1939_addr addr; + + /* Flags for quick lookups during skb processing. + * These are set in the receive path only. + */ +#define J1939_ECU_LOCAL_SRC BIT(0) +#define J1939_ECU_LOCAL_DST BIT(1) + u8 flags; + + priority_t priority; +}; + +static inline +struct j1939_sk_buff_cb *j1939_skb_to_cb(const struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct j1939_sk_buff_cb) > sizeof(skb->cb)); + + return (struct j1939_sk_buff_cb *)skb->cb; +} + +int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb); +bool j1939_sk_recv_match(struct j1939_priv *priv, + struct j1939_sk_buff_cb *skcb); +void j1939_sk_send_loop_abort(struct sock *sk, int err); +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type); +void j1939_sk_queue_activate_next(struct j1939_session *session); + +/* stack entries */ +struct j1939_session *j1939_tp_send(struct j1939_priv *priv, + struct sk_buff *skb, size_t size); +int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb); +int j1939_ac_fixup(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_ac_recv(struct j1939_priv *priv, struct sk_buff *skb); +void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb); + +/* network management */ +struct j1939_ecu *j1939_ecu_create_locked(struct j1939_priv *priv, name_t name); + +void j1939_ecu_timer_start(struct j1939_ecu *ecu); +void j1939_ecu_timer_cancel(struct j1939_ecu *ecu); +void j1939_ecu_unmap_all(struct j1939_priv *priv); + +struct j1939_priv *j1939_netdev_start(struct net_device *ndev); +void j1939_netdev_stop(struct j1939_priv *priv); + +void j1939_priv_put(struct j1939_priv *priv); +void j1939_priv_get(struct j1939_priv *priv); + +/* notify/alert all j1939 sockets bound to ifindex */ +void j1939_sk_netdev_event_netdown(struct j1939_priv *priv); +int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk); +void j1939_tp_init(struct j1939_priv *priv); + +/* decrement pending skb for a j1939 socket */ +void j1939_sock_pending_del(struct sock *sk); + +enum j1939_session_state { + J1939_SESSION_NEW, + J1939_SESSION_ACTIVE, + /* waiting for abort signal on the bus */ + J1939_SESSION_WAITING_ABORT, + J1939_SESSION_ACTIVE_MAX, + J1939_SESSION_DONE, +}; + +struct j1939_session { + struct j1939_priv *priv; + struct list_head active_session_list_entry; + struct list_head sk_session_queue_entry; + struct kref kref; + struct sock *sk; + + /* ifindex, src, dst, pgn define the session block + * the are _never_ modified after insertion in the list + * this decreases locking problems a _lot_ + */ + struct j1939_sk_buff_cb skcb; + struct sk_buff_head skb_queue; + + /* all tx related stuff (last_txcmd, pkt.tx) + * is protected (modified only) with the txtimer hrtimer + * 'total' & 'block' are never changed, + * last_cmd, last & block are protected by ->lock + * this means that the tx may run after cts is received that should + * have stopped tx, but this time discrepancy is never avoided anyhow + */ + u8 last_cmd, last_txcmd; + bool transmission; + bool extd; + /* Total message size, number of bytes */ + unsigned int total_message_size; + /* Total number of bytes queue from socket to the session */ + unsigned int total_queued_size; + unsigned int tx_retry; + + int err; + u32 tskey; + enum j1939_session_state state; + + /* Packets counters for a (extended) transfer session. The packet is + * maximal of 7 bytes. + */ + struct { + /* total - total number of packets for this session */ + unsigned int total; + /* last - last packet of a transfer block after which + * responder should send ETP.CM_CTS and originator + * ETP.CM_DPO + */ + unsigned int last; + /* tx - number of packets send by originator node. + * this counter can be set back if responder node + * didn't received all packets send by originator. + */ + unsigned int tx; + unsigned int tx_acked; + /* rx - number of packets received */ + unsigned int rx; + /* block - amount of packets expected in one block */ + unsigned int block; + /* dpo - ETP.CM_DPO, Data Packet Offset */ + unsigned int dpo; + } pkt; + struct hrtimer txtimer, rxtimer; +}; + +struct j1939_sock { + struct sock sk; /* must be first to skip with memset */ + struct j1939_priv *priv; + struct list_head list; + +#define J1939_SOCK_BOUND BIT(0) +#define J1939_SOCK_CONNECTED BIT(1) +#define J1939_SOCK_PROMISC BIT(2) +#define J1939_SOCK_ERRQUEUE BIT(3) + int state; + + int ifindex; + struct j1939_addr addr; + struct j1939_filter *filters; + int nfilters; + pgn_t pgn_rx_filter; + + /* j1939 may emit equal PGN (!= equal CAN-id's) out of order + * when transport protocol comes in. + * To allow emitting in order, keep a 'pending' nr. of packets + */ + atomic_t skb_pending; + wait_queue_head_t waitq; + + /* lock for the sk_session_queue list */ + spinlock_t sk_session_queue_lock; + struct list_head sk_session_queue; +}; + +static inline struct j1939_sock *j1939_sk(const struct sock *sk) +{ + return container_of(sk, struct j1939_sock, sk); +} + +void j1939_session_get(struct j1939_session *session); +void j1939_session_put(struct j1939_session *session); +void j1939_session_skb_queue(struct j1939_session *session, + struct sk_buff *skb); +int j1939_session_activate(struct j1939_session *session); +void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec); +void j1939_session_timers_cancel(struct j1939_session *session); + +#define J1939_MAX_TP_PACKET_SIZE (7 * 0xff) +#define J1939_MAX_ETP_PACKET_SIZE (7 * 0x00ffffff) + +#define J1939_REGULAR 0 +#define J1939_EXTENDED 1 + +/* CAN protocol */ +extern const struct can_proto j1939_can_proto; + +#endif /* _J1939_PRIV_H_ */ diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c new file mode 100644 index 000000000000..def2f813ffce --- /dev/null +++ b/net/can/j1939/main.c @@ -0,0 +1,403 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <pieter.beyens@eia.be> +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <robin@protonic.nl> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +/* Core of can-j1939 that links j1939 to CAN. */ + +#include <linux/can/can-ml.h> +#include <linux/can/core.h> +#include <linux/can/skb.h> +#include <linux/if_arp.h> +#include <linux/module.h> + +#include "j1939-priv.h" + +MODULE_DESCRIPTION("PF_CAN SAE J1939"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("EIA Electronics (Kurt Van Dijck & Pieter Beyens)"); +MODULE_ALIAS("can-proto-" __stringify(CAN_J1939)); + +/* LOWLEVEL CAN interface */ + +/* CAN_HDR: #bytes before can_frame data part */ +#define J1939_CAN_HDR (offsetof(struct can_frame, data)) + +/* CAN_FTR: #bytes beyond data part */ +#define J1939_CAN_FTR (sizeof(struct can_frame) - J1939_CAN_HDR - \ + sizeof(((struct can_frame *)0)->data)) + +/* lowest layer */ +static void j1939_can_recv(struct sk_buff *iskb, void *data) +{ + struct j1939_priv *priv = data; + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb, *iskcb; + struct can_frame *cf; + + /* create a copy of the skb + * j1939 only delivers the real data bytes, + * the header goes into sockaddr. + * j1939 may not touch the incoming skb in such way + */ + skb = skb_clone(iskb, GFP_ATOMIC); + if (!skb) + return; + + can_skb_set_owner(skb, iskb->sk); + + /* get a pointer to the header of the skb + * the skb payload (pointer) is moved, so that the next skb_data + * returns the actual payload + */ + cf = (void *)skb->data; + skb_pull(skb, J1939_CAN_HDR); + + /* fix length, set to dlc, with 8 maximum */ + skb_trim(skb, min_t(uint8_t, cf->can_dlc, 8)); + + /* set addr */ + skcb = j1939_skb_to_cb(skb); + memset(skcb, 0, sizeof(*skcb)); + + iskcb = j1939_skb_to_cb(iskb); + skcb->tskey = iskcb->tskey; + skcb->priority = (cf->can_id >> 26) & 0x7; + skcb->addr.sa = cf->can_id; + skcb->addr.pgn = (cf->can_id >> 8) & J1939_PGN_MAX; + /* set default message type */ + skcb->addr.type = J1939_TP; + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) { + /* Type 1: with destination address */ + skcb->addr.da = skcb->addr.pgn; + /* normalize pgn: strip dst address */ + skcb->addr.pgn &= 0x3ff00; + } else { + /* set broadcast address */ + skcb->addr.da = J1939_NO_ADDR; + } + + /* update localflags */ + read_lock_bh(&priv->lock); + if (j1939_address_is_unicast(skcb->addr.sa) && + priv->ents[skcb->addr.sa].nusers) + skcb->flags |= J1939_ECU_LOCAL_SRC; + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + read_unlock_bh(&priv->lock); + + /* deliver into the j1939 stack ... */ + j1939_ac_recv(priv, skb); + + if (j1939_tp_recv(priv, skb)) + /* this means the transport layer processed the message */ + goto done; + + j1939_simple_recv(priv, skb); + j1939_sk_recv(priv, skb); + done: + kfree_skb(skb); +} + +/* NETDEV MANAGEMENT */ + +/* values for can_rx_(un)register */ +#define J1939_CAN_ID CAN_EFF_FLAG +#define J1939_CAN_MASK (CAN_EFF_FLAG | CAN_RTR_FLAG) + +static DEFINE_SPINLOCK(j1939_netdev_lock); + +static struct j1939_priv *j1939_priv_create(struct net_device *ndev) +{ + struct j1939_priv *priv; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return NULL; + + rwlock_init(&priv->lock); + INIT_LIST_HEAD(&priv->ecus); + priv->ndev = ndev; + kref_init(&priv->kref); + kref_init(&priv->rx_kref); + dev_hold(ndev); + + netdev_dbg(priv->ndev, "%s : 0x%p\n", __func__, priv); + + return priv; +} + +static inline void j1939_priv_set(struct net_device *ndev, + struct j1939_priv *priv) +{ + struct can_ml_priv *can_ml_priv = ndev->ml_priv; + + can_ml_priv->j1939_priv = priv; +} + +static void __j1939_priv_release(struct kref *kref) +{ + struct j1939_priv *priv = container_of(kref, struct j1939_priv, kref); + struct net_device *ndev = priv->ndev; + + netdev_dbg(priv->ndev, "%s: 0x%p\n", __func__, priv); + + dev_put(ndev); + kfree(priv); +} + +void j1939_priv_put(struct j1939_priv *priv) +{ + kref_put(&priv->kref, __j1939_priv_release); +} + +void j1939_priv_get(struct j1939_priv *priv) +{ + kref_get(&priv->kref); +} + +static int j1939_can_rx_register(struct j1939_priv *priv) +{ + struct net_device *ndev = priv->ndev; + int ret; + + j1939_priv_get(priv); + ret = can_rx_register(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK, + j1939_can_recv, priv, "j1939", NULL); + if (ret < 0) { + j1939_priv_put(priv); + return ret; + } + + return 0; +} + +static void j1939_can_rx_unregister(struct j1939_priv *priv) +{ + struct net_device *ndev = priv->ndev; + + can_rx_unregister(dev_net(ndev), ndev, J1939_CAN_ID, J1939_CAN_MASK, + j1939_can_recv, priv); + + j1939_priv_put(priv); +} + +static void __j1939_rx_release(struct kref *kref) + __releases(&j1939_netdev_lock) +{ + struct j1939_priv *priv = container_of(kref, struct j1939_priv, + rx_kref); + + j1939_can_rx_unregister(priv); + j1939_ecu_unmap_all(priv); + j1939_priv_set(priv->ndev, NULL); + spin_unlock(&j1939_netdev_lock); +} + +/* get pointer to priv without increasing ref counter */ +static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev) +{ + struct can_ml_priv *can_ml_priv = ndev->ml_priv; + + return can_ml_priv->j1939_priv; +} + +static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev) +{ + struct j1939_priv *priv; + + lockdep_assert_held(&j1939_netdev_lock); + + if (ndev->type != ARPHRD_CAN) + return NULL; + + priv = j1939_ndev_to_priv(ndev); + if (priv) + j1939_priv_get(priv); + + return priv; +} + +static struct j1939_priv *j1939_priv_get_by_ndev(struct net_device *ndev) +{ + struct j1939_priv *priv; + + spin_lock(&j1939_netdev_lock); + priv = j1939_priv_get_by_ndev_locked(ndev); + spin_unlock(&j1939_netdev_lock); + + return priv; +} + +struct j1939_priv *j1939_netdev_start(struct net_device *ndev) +{ + struct j1939_priv *priv, *priv_new; + int ret; + + priv = j1939_priv_get_by_ndev(ndev); + if (priv) { + kref_get(&priv->rx_kref); + return priv; + } + + priv = j1939_priv_create(ndev); + if (!priv) + return ERR_PTR(-ENOMEM); + + j1939_tp_init(priv); + spin_lock_init(&priv->j1939_socks_lock); + INIT_LIST_HEAD(&priv->j1939_socks); + + spin_lock(&j1939_netdev_lock); + priv_new = j1939_priv_get_by_ndev_locked(ndev); + if (priv_new) { + /* Someone was faster than us, use their priv and roll + * back our's. + */ + spin_unlock(&j1939_netdev_lock); + dev_put(ndev); + kfree(priv); + kref_get(&priv_new->rx_kref); + return priv_new; + } + j1939_priv_set(ndev, priv); + spin_unlock(&j1939_netdev_lock); + + ret = j1939_can_rx_register(priv); + if (ret < 0) + goto out_priv_put; + + return priv; + + out_priv_put: + j1939_priv_set(ndev, NULL); + dev_put(ndev); + kfree(priv); + + return ERR_PTR(ret); +} + +void j1939_netdev_stop(struct j1939_priv *priv) +{ + kref_put_lock(&priv->rx_kref, __j1939_rx_release, &j1939_netdev_lock); + j1939_priv_put(priv); +} + +int j1939_send_one(struct j1939_priv *priv, struct sk_buff *skb) +{ + int ret, dlc; + canid_t canid; + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct can_frame *cf; + + /* apply sanity checks */ + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) + skcb->addr.pgn &= J1939_PGN_PDU1_MAX; + else + skcb->addr.pgn &= J1939_PGN_MAX; + + if (skcb->priority > 7) + skcb->priority = 6; + + ret = j1939_ac_fixup(priv, skb); + if (unlikely(ret)) + goto failed; + dlc = skb->len; + + /* re-claim the CAN_HDR from the SKB */ + cf = skb_push(skb, J1939_CAN_HDR); + + /* make it a full can frame again */ + skb_put(skb, J1939_CAN_FTR + (8 - dlc)); + + canid = CAN_EFF_FLAG | + (skcb->priority << 26) | + (skcb->addr.pgn << 8) | + skcb->addr.sa; + if (j1939_pgn_is_pdu1(skcb->addr.pgn)) + canid |= skcb->addr.da << 8; + + cf->can_id = canid; + cf->can_dlc = dlc; + + return can_send(skb, 1); + + failed: + kfree_skb(skb); + return ret; +} + +static int j1939_netdev_notify(struct notifier_block *nb, + unsigned long msg, void *data) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(data); + struct j1939_priv *priv; + + priv = j1939_priv_get_by_ndev(ndev); + if (!priv) + goto notify_done; + + if (ndev->type != ARPHRD_CAN) + goto notify_put; + + switch (msg) { + case NETDEV_DOWN: + j1939_cancel_active_session(priv, NULL); + j1939_sk_netdev_event_netdown(priv); + j1939_ecu_unmap_all(priv); + break; + } + +notify_put: + j1939_priv_put(priv); + +notify_done: + return NOTIFY_DONE; +} + +static struct notifier_block j1939_netdev_notifier = { + .notifier_call = j1939_netdev_notify, +}; + +/* MODULE interface */ +static __init int j1939_module_init(void) +{ + int ret; + + pr_info("can: SAE J1939\n"); + + ret = register_netdevice_notifier(&j1939_netdev_notifier); + if (ret) + goto fail_notifier; + + ret = can_proto_register(&j1939_can_proto); + if (ret < 0) { + pr_err("can: registration of j1939 protocol failed\n"); + goto fail_sk; + } + + return 0; + + fail_sk: + unregister_netdevice_notifier(&j1939_netdev_notifier); + fail_notifier: + return ret; +} + +static __exit void j1939_module_exit(void) +{ + can_proto_unregister(&j1939_can_proto); + + unregister_netdevice_notifier(&j1939_netdev_notifier); +} + +module_init(j1939_module_init); +module_exit(j1939_module_exit); diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c new file mode 100644 index 000000000000..37c1040bcb9c --- /dev/null +++ b/net/can/j1939/socket.c @@ -0,0 +1,1160 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Pieter Beyens <pieter.beyens@eia.be> +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <robin@protonic.nl> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/can/core.h> +#include <linux/can/skb.h> +#include <linux/errqueue.h> +#include <linux/if_arp.h> + +#include "j1939-priv.h" + +#define J1939_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_addr.j1939) + +/* conversion function between struct sock::sk_priority from linux and + * j1939 priority field + */ +static inline priority_t j1939_prio(u32 sk_priority) +{ + sk_priority = min(sk_priority, 7U); + + return 7 - sk_priority; +} + +static inline u32 j1939_to_sk_priority(priority_t prio) +{ + return 7 - prio; +} + +/* function to see if pgn is to be evaluated */ +static inline bool j1939_pgn_is_valid(pgn_t pgn) +{ + return pgn <= J1939_PGN_MAX; +} + +/* test function to avoid non-zero DA placeholder for pdu1 pgn's */ +static inline bool j1939_pgn_is_clean_pdu(pgn_t pgn) +{ + if (j1939_pgn_is_pdu1(pgn)) + return !(pgn & 0xff); + else + return true; +} + +static inline void j1939_sock_pending_add(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + atomic_inc(&jsk->skb_pending); +} + +static int j1939_sock_pending_get(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + return atomic_read(&jsk->skb_pending); +} + +void j1939_sock_pending_del(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + /* atomic_dec_return returns the new value */ + if (!atomic_dec_return(&jsk->skb_pending)) + wake_up(&jsk->waitq); /* no pending SKB's */ +} + +static void j1939_jsk_add(struct j1939_priv *priv, struct j1939_sock *jsk) +{ + jsk->state |= J1939_SOCK_BOUND; + j1939_priv_get(priv); + jsk->priv = priv; + + spin_lock_bh(&priv->j1939_socks_lock); + list_add_tail(&jsk->list, &priv->j1939_socks); + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static void j1939_jsk_del(struct j1939_priv *priv, struct j1939_sock *jsk) +{ + spin_lock_bh(&priv->j1939_socks_lock); + list_del_init(&jsk->list); + spin_unlock_bh(&priv->j1939_socks_lock); + + jsk->priv = NULL; + j1939_priv_put(priv); + jsk->state &= ~J1939_SOCK_BOUND; +} + +static bool j1939_sk_queue_session(struct j1939_session *session) +{ + struct j1939_sock *jsk = j1939_sk(session->sk); + bool empty; + + spin_lock_bh(&jsk->sk_session_queue_lock); + empty = list_empty(&jsk->sk_session_queue); + j1939_session_get(session); + list_add_tail(&session->sk_session_queue_entry, &jsk->sk_session_queue); + spin_unlock_bh(&jsk->sk_session_queue_lock); + j1939_sock_pending_add(&jsk->sk); + + return empty; +} + +static struct +j1939_session *j1939_sk_get_incomplete_session(struct j1939_sock *jsk) +{ + struct j1939_session *session = NULL; + + spin_lock_bh(&jsk->sk_session_queue_lock); + if (!list_empty(&jsk->sk_session_queue)) { + session = list_last_entry(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + if (session->total_queued_size == session->total_message_size) + session = NULL; + else + j1939_session_get(session); + } + spin_unlock_bh(&jsk->sk_session_queue_lock); + + return session; +} + +static void j1939_sk_queue_drop_all(struct j1939_priv *priv, + struct j1939_sock *jsk, int err) +{ + struct j1939_session *session, *tmp; + + netdev_dbg(priv->ndev, "%s: err: %i\n", __func__, err); + spin_lock_bh(&jsk->sk_session_queue_lock); + list_for_each_entry_safe(session, tmp, &jsk->sk_session_queue, + sk_session_queue_entry) { + list_del_init(&session->sk_session_queue_entry); + session->err = err; + j1939_session_put(session); + } + spin_unlock_bh(&jsk->sk_session_queue_lock); +} + +static void j1939_sk_queue_activate_next_locked(struct j1939_session *session) +{ + struct j1939_sock *jsk; + struct j1939_session *first; + int err; + + /* RX-Session don't have a socket (yet) */ + if (!session->sk) + return; + + jsk = j1939_sk(session->sk); + lockdep_assert_held(&jsk->sk_session_queue_lock); + + err = session->err; + + first = list_first_entry_or_null(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + + /* Some else has already activated the next session */ + if (first != session) + return; + +activate_next: + list_del_init(&first->sk_session_queue_entry); + j1939_session_put(first); + first = list_first_entry_or_null(&jsk->sk_session_queue, + struct j1939_session, + sk_session_queue_entry); + if (!first) + return; + + if (WARN_ON_ONCE(j1939_session_activate(first))) { + first->err = -EBUSY; + goto activate_next; + } else { + /* Give receiver some time (arbitrary chosen) to recover */ + int time_ms = 0; + + if (err) + time_ms = 10 + prandom_u32_max(16); + + j1939_tp_schedule_txtimer(first, time_ms); + } +} + +void j1939_sk_queue_activate_next(struct j1939_session *session) +{ + struct j1939_sock *jsk; + + if (!session->sk) + return; + + jsk = j1939_sk(session->sk); + + spin_lock_bh(&jsk->sk_session_queue_lock); + j1939_sk_queue_activate_next_locked(session); + spin_unlock_bh(&jsk->sk_session_queue_lock); +} + +static bool j1939_sk_match_dst(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + if ((jsk->state & J1939_SOCK_PROMISC)) + return true; + + /* Destination address filter */ + if (jsk->addr.src_name && skcb->addr.dst_name) { + if (jsk->addr.src_name != skcb->addr.dst_name) + return false; + } else { + /* receive (all sockets) if + * - all packages that match our bind() address + * - all broadcast on a socket if SO_BROADCAST + * is set + */ + if (j1939_address_is_unicast(skcb->addr.da)) { + if (jsk->addr.sa != skcb->addr.da) + return false; + } else if (!sock_flag(&jsk->sk, SOCK_BROADCAST)) { + /* receiving broadcast without SO_BROADCAST + * flag is not allowed + */ + return false; + } + } + + /* Source address filter */ + if (jsk->state & J1939_SOCK_CONNECTED) { + /* receive (all sockets) if + * - all packages that match our connect() name or address + */ + if (jsk->addr.dst_name && skcb->addr.src_name) { + if (jsk->addr.dst_name != skcb->addr.src_name) + return false; + } else { + if (jsk->addr.da != skcb->addr.sa) + return false; + } + } + + /* PGN filter */ + if (j1939_pgn_is_valid(jsk->pgn_rx_filter) && + jsk->pgn_rx_filter != skcb->addr.pgn) + return false; + + return true; +} + +/* matches skb control buffer (addr) with a j1939 filter */ +static bool j1939_sk_match_filter(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + const struct j1939_filter *f = jsk->filters; + int nfilter = jsk->nfilters; + + if (!nfilter) + /* receive all when no filters are assigned */ + return true; + + for (; nfilter; ++f, --nfilter) { + if ((skcb->addr.pgn & f->pgn_mask) != f->pgn) + continue; + if ((skcb->addr.sa & f->addr_mask) != f->addr) + continue; + if ((skcb->addr.src_name & f->name_mask) != f->name) + continue; + return true; + } + return false; +} + +static bool j1939_sk_recv_match_one(struct j1939_sock *jsk, + const struct j1939_sk_buff_cb *skcb) +{ + if (!(jsk->state & J1939_SOCK_BOUND)) + return false; + + if (!j1939_sk_match_dst(jsk, skcb)) + return false; + + if (!j1939_sk_match_filter(jsk, skcb)) + return false; + + return true; +} + +static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) +{ + const struct j1939_sk_buff_cb *oskcb = j1939_skb_to_cb(oskb); + struct j1939_sk_buff_cb *skcb; + struct sk_buff *skb; + + if (oskb->sk == &jsk->sk) + return; + + if (!j1939_sk_recv_match_one(jsk, oskcb)) + return; + + skb = skb_clone(oskb, GFP_ATOMIC); + if (!skb) { + pr_warn("skb clone failed\n"); + return; + } + can_skb_set_owner(skb, oskb->sk); + + skcb = j1939_skb_to_cb(skb); + skcb->msg_flags &= ~(MSG_DONTROUTE); + if (skb->sk) + skcb->msg_flags |= MSG_DONTROUTE; + + if (sock_queue_rcv_skb(&jsk->sk, skb) < 0) + kfree_skb(skb); +} + +bool j1939_sk_recv_match(struct j1939_priv *priv, struct j1939_sk_buff_cb *skcb) +{ + struct j1939_sock *jsk; + bool match = false; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + match = j1939_sk_recv_match_one(jsk, skcb); + if (match) + break; + } + spin_unlock_bh(&priv->j1939_socks_lock); + + return match; +} + +void j1939_sk_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sock *jsk; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + j1939_sk_recv_one(jsk, skb); + } + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static int j1939_sk_init(struct sock *sk) +{ + struct j1939_sock *jsk = j1939_sk(sk); + + /* Ensure that "sk" is first member in "struct j1939_sock", so that we + * can skip it during memset(). + */ + BUILD_BUG_ON(offsetof(struct j1939_sock, sk) != 0); + memset((void *)jsk + sizeof(jsk->sk), 0x0, + sizeof(*jsk) - sizeof(jsk->sk)); + + INIT_LIST_HEAD(&jsk->list); + init_waitqueue_head(&jsk->waitq); + jsk->sk.sk_priority = j1939_to_sk_priority(6); + jsk->sk.sk_reuse = 1; /* per default */ + jsk->addr.sa = J1939_NO_ADDR; + jsk->addr.da = J1939_NO_ADDR; + jsk->addr.pgn = J1939_NO_PGN; + jsk->pgn_rx_filter = J1939_NO_PGN; + atomic_set(&jsk->skb_pending, 0); + spin_lock_init(&jsk->sk_session_queue_lock); + INIT_LIST_HEAD(&jsk->sk_session_queue); + + return 0; +} + +static int j1939_sk_sanity_check(struct sockaddr_can *addr, int len) +{ + if (!addr) + return -EDESTADDRREQ; + if (len < J1939_MIN_NAMELEN) + return -EINVAL; + if (addr->can_family != AF_CAN) + return -EINVAL; + if (!addr->can_ifindex) + return -ENODEV; + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && + !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) + return -EINVAL; + + return 0; +} + +static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct j1939_sock *jsk = j1939_sk(sock->sk); + struct j1939_priv *priv = jsk->priv; + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + int ret = 0; + + ret = j1939_sk_sanity_check(addr, len); + if (ret) + return ret; + + lock_sock(sock->sk); + + /* Already bound to an interface? */ + if (jsk->state & J1939_SOCK_BOUND) { + /* A re-bind() to a different interface is not + * supported. + */ + if (jsk->ifindex != addr->can_ifindex) { + ret = -EINVAL; + goto out_release_sock; + } + + /* drop old references */ + j1939_jsk_del(priv, jsk); + j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa); + } else { + struct net_device *ndev; + + ndev = dev_get_by_index(net, addr->can_ifindex); + if (!ndev) { + ret = -ENODEV; + goto out_release_sock; + } + + if (ndev->type != ARPHRD_CAN) { + dev_put(ndev); + ret = -ENODEV; + goto out_release_sock; + } + + priv = j1939_netdev_start(ndev); + dev_put(ndev); + if (IS_ERR(priv)) { + ret = PTR_ERR(priv); + goto out_release_sock; + } + + jsk->ifindex = addr->can_ifindex; + } + + /* set default transmit pgn */ + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + jsk->pgn_rx_filter = addr->can_addr.j1939.pgn; + jsk->addr.src_name = addr->can_addr.j1939.name; + jsk->addr.sa = addr->can_addr.j1939.addr; + + /* get new references */ + ret = j1939_local_ecu_get(priv, jsk->addr.src_name, jsk->addr.sa); + if (ret) { + j1939_netdev_stop(priv); + goto out_release_sock; + } + + j1939_jsk_add(priv, jsk); + + out_release_sock: /* fall through */ + release_sock(sock->sk); + + return ret; +} + +static int j1939_sk_connect(struct socket *sock, struct sockaddr *uaddr, + int len, int flags) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct j1939_sock *jsk = j1939_sk(sock->sk); + int ret = 0; + + ret = j1939_sk_sanity_check(addr, len); + if (ret) + return ret; + + lock_sock(sock->sk); + + /* bind() before connect() is mandatory */ + if (!(jsk->state & J1939_SOCK_BOUND)) { + ret = -EINVAL; + goto out_release_sock; + } + + /* A connect() to a different interface is not supported. */ + if (jsk->ifindex != addr->can_ifindex) { + ret = -EINVAL; + goto out_release_sock; + } + + if (!addr->can_addr.j1939.name && + addr->can_addr.j1939.addr == J1939_NO_ADDR && + !sock_flag(&jsk->sk, SOCK_BROADCAST)) { + /* broadcast, but SO_BROADCAST not set */ + ret = -EACCES; + goto out_release_sock; + } + + jsk->addr.dst_name = addr->can_addr.j1939.name; + jsk->addr.da = addr->can_addr.j1939.addr; + + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + jsk->addr.pgn = addr->can_addr.j1939.pgn; + + jsk->state |= J1939_SOCK_CONNECTED; + + out_release_sock: /* fall through */ + release_sock(sock->sk); + + return ret; +} + +static void j1939_sk_sock2sockaddr_can(struct sockaddr_can *addr, + const struct j1939_sock *jsk, int peer) +{ + addr->can_family = AF_CAN; + addr->can_ifindex = jsk->ifindex; + addr->can_addr.j1939.pgn = jsk->addr.pgn; + if (peer) { + addr->can_addr.j1939.name = jsk->addr.dst_name; + addr->can_addr.j1939.addr = jsk->addr.da; + } else { + addr->can_addr.j1939.name = jsk->addr.src_name; + addr->can_addr.j1939.addr = jsk->addr.sa; + } +} + +static int j1939_sk_getname(struct socket *sock, struct sockaddr *uaddr, + int peer) +{ + struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int ret = 0; + + lock_sock(sk); + + if (peer && !(jsk->state & J1939_SOCK_CONNECTED)) { + ret = -EADDRNOTAVAIL; + goto failure; + } + + j1939_sk_sock2sockaddr_can(addr, jsk, peer); + ret = J1939_MIN_NAMELEN; + + failure: + release_sock(sk); + + return ret; +} + +static int j1939_sk_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk; + + if (!sk) + return 0; + + jsk = j1939_sk(sk); + lock_sock(sk); + + if (jsk->state & J1939_SOCK_BOUND) { + struct j1939_priv *priv = jsk->priv; + + if (wait_event_interruptible(jsk->waitq, + !j1939_sock_pending_get(&jsk->sk))) { + j1939_cancel_active_session(priv, sk); + j1939_sk_queue_drop_all(priv, jsk, ESHUTDOWN); + } + + j1939_jsk_del(priv, jsk); + + j1939_local_ecu_put(priv, jsk->addr.src_name, + jsk->addr.sa); + + j1939_netdev_stop(priv); + } + + sock_orphan(sk); + sock->sk = NULL; + + release_sock(sk); + sock_put(sk); + + return 0; +} + +static int j1939_sk_setsockopt_flag(struct j1939_sock *jsk, char __user *optval, + unsigned int optlen, int flag) +{ + int tmp; + + if (optlen != sizeof(tmp)) + return -EINVAL; + if (copy_from_user(&tmp, optval, optlen)) + return -EFAULT; + lock_sock(&jsk->sk); + if (tmp) + jsk->state |= flag; + else + jsk->state &= ~flag; + release_sock(&jsk->sk); + return tmp; +} + +static int j1939_sk_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int tmp, count = 0, ret = 0; + struct j1939_filter *filters = NULL, *ofilters; + + if (level != SOL_CAN_J1939) + return -EINVAL; + + switch (optname) { + case SO_J1939_FILTER: + if (optval) { + struct j1939_filter *f; + int c; + + if (optlen % sizeof(*filters) != 0) + return -EINVAL; + + if (optlen > J1939_FILTER_MAX * + sizeof(struct j1939_filter)) + return -EINVAL; + + count = optlen / sizeof(*filters); + filters = memdup_user(optval, optlen); + if (IS_ERR(filters)) + return PTR_ERR(filters); + + for (f = filters, c = count; c; f++, c--) { + f->name &= f->name_mask; + f->pgn &= f->pgn_mask; + f->addr &= f->addr_mask; + } + } + + lock_sock(&jsk->sk); + ofilters = jsk->filters; + jsk->filters = filters; + jsk->nfilters = count; + release_sock(&jsk->sk); + kfree(ofilters); + return 0; + case SO_J1939_PROMISC: + return j1939_sk_setsockopt_flag(jsk, optval, optlen, + J1939_SOCK_PROMISC); + case SO_J1939_ERRQUEUE: + ret = j1939_sk_setsockopt_flag(jsk, optval, optlen, + J1939_SOCK_ERRQUEUE); + if (ret < 0) + return ret; + + if (!(jsk->state & J1939_SOCK_ERRQUEUE)) + skb_queue_purge(&sk->sk_error_queue); + return ret; + case SO_J1939_SEND_PRIO: + if (optlen != sizeof(tmp)) + return -EINVAL; + if (copy_from_user(&tmp, optval, optlen)) + return -EFAULT; + if (tmp < 0 || tmp > 7) + return -EDOM; + if (tmp < 2 && !capable(CAP_NET_ADMIN)) + return -EPERM; + lock_sock(&jsk->sk); + jsk->sk.sk_priority = j1939_to_sk_priority(tmp); + release_sock(&jsk->sk); + return 0; + default: + return -ENOPROTOOPT; + } +} + +static int j1939_sk_getsockopt(struct socket *sock, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + int ret, ulen; + /* set defaults for using 'int' properties */ + int tmp = 0; + int len = sizeof(tmp); + void *val = &tmp; + + if (level != SOL_CAN_J1939) + return -EINVAL; + if (get_user(ulen, optlen)) + return -EFAULT; + if (ulen < 0) + return -EINVAL; + + lock_sock(&jsk->sk); + switch (optname) { + case SO_J1939_PROMISC: + tmp = (jsk->state & J1939_SOCK_PROMISC) ? 1 : 0; + break; + case SO_J1939_ERRQUEUE: + tmp = (jsk->state & J1939_SOCK_ERRQUEUE) ? 1 : 0; + break; + case SO_J1939_SEND_PRIO: + tmp = j1939_prio(jsk->sk.sk_priority); + break; + default: + ret = -ENOPROTOOPT; + goto no_copy; + } + + /* copy to user, based on 'len' & 'val' + * but most sockopt's are 'int' properties, and have 'len' & 'val' + * left unchanged, but instead modified 'tmp' + */ + if (len > ulen) + ret = -EFAULT; + else if (put_user(len, optlen)) + ret = -EFAULT; + else if (copy_to_user(optval, val, len)) + ret = -EFAULT; + else + ret = 0; + no_copy: + release_sock(&jsk->sk); + return ret; +} + +static int j1939_sk_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct sock *sk = sock->sk; + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + int ret = 0; + + if (flags & ~(MSG_DONTWAIT | MSG_ERRQUEUE)) + return -EINVAL; + + if (flags & MSG_ERRQUEUE) + return sock_recv_errqueue(sock->sk, msg, size, SOL_CAN_J1939, + SCM_J1939_ERRQUEUE); + + skb = skb_recv_datagram(sk, flags, 0, &ret); + if (!skb) + return ret; + + if (size < skb->len) + msg->msg_flags |= MSG_TRUNC; + else + size = skb->len; + + ret = memcpy_to_msg(msg, skb->data, size); + if (ret < 0) { + skb_free_datagram(sk, skb); + return ret; + } + + skcb = j1939_skb_to_cb(skb); + if (j1939_address_is_valid(skcb->addr.da)) + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_ADDR, + sizeof(skcb->addr.da), &skcb->addr.da); + + if (skcb->addr.dst_name) + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_DEST_NAME, + sizeof(skcb->addr.dst_name), &skcb->addr.dst_name); + + put_cmsg(msg, SOL_CAN_J1939, SCM_J1939_PRIO, + sizeof(skcb->priority), &skcb->priority); + + if (msg->msg_name) { + struct sockaddr_can *paddr = msg->msg_name; + + msg->msg_namelen = J1939_MIN_NAMELEN; + memset(msg->msg_name, 0, msg->msg_namelen); + paddr->can_family = AF_CAN; + paddr->can_ifindex = skb->skb_iif; + paddr->can_addr.j1939.name = skcb->addr.src_name; + paddr->can_addr.j1939.addr = skcb->addr.sa; + paddr->can_addr.j1939.pgn = skcb->addr.pgn; + } + + sock_recv_ts_and_drops(msg, sk, skb); + msg->msg_flags |= skcb->msg_flags; + skb_free_datagram(sk, skb); + + return size; +} + +static struct sk_buff *j1939_sk_alloc_skb(struct net_device *ndev, + struct sock *sk, + struct msghdr *msg, size_t size, + int *errcode) +{ + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_sk_buff_cb *skcb; + struct sk_buff *skb; + int ret; + + skb = sock_alloc_send_skb(sk, + size + + sizeof(struct can_frame) - + sizeof(((struct can_frame *)NULL)->data) + + sizeof(struct can_skb_priv), + msg->msg_flags & MSG_DONTWAIT, &ret); + if (!skb) + goto failure; + + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = ndev->ifindex; + can_skb_prv(skb)->skbcnt = 0; + skb_reserve(skb, offsetof(struct can_frame, data)); + + ret = memcpy_from_msg(skb_put(skb, size), msg, size); + if (ret < 0) + goto free_skb; + + skb->dev = ndev; + + skcb = j1939_skb_to_cb(skb); + memset(skcb, 0, sizeof(*skcb)); + skcb->addr = jsk->addr; + skcb->priority = j1939_prio(sk->sk_priority); + + if (msg->msg_name) { + struct sockaddr_can *addr = msg->msg_name; + + if (addr->can_addr.j1939.name || + addr->can_addr.j1939.addr != J1939_NO_ADDR) { + skcb->addr.dst_name = addr->can_addr.j1939.name; + skcb->addr.da = addr->can_addr.j1939.addr; + } + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn)) + skcb->addr.pgn = addr->can_addr.j1939.pgn; + } + + *errcode = ret; + return skb; + +free_skb: + kfree_skb(skb); +failure: + *errcode = ret; + return NULL; +} + +static size_t j1939_sk_opt_stats_get_size(void) +{ + return + nla_total_size(sizeof(u32)) + /* J1939_NLA_BYTES_ACKED */ + 0; +} + +static struct sk_buff * +j1939_sk_get_timestamping_opt_stats(struct j1939_session *session) +{ + struct sk_buff *stats; + u32 size; + + stats = alloc_skb(j1939_sk_opt_stats_get_size(), GFP_ATOMIC); + if (!stats) + return NULL; + + if (session->skcb.addr.type == J1939_SIMPLE) + size = session->total_message_size; + else + size = min(session->pkt.tx_acked * 7, + session->total_message_size); + + nla_put_u32(stats, J1939_NLA_BYTES_ACKED, size); + + return stats; +} + +void j1939_sk_errqueue(struct j1939_session *session, + enum j1939_sk_errqueue_type type) +{ + struct j1939_priv *priv = session->priv; + struct sock *sk = session->sk; + struct j1939_sock *jsk; + struct sock_exterr_skb *serr; + struct sk_buff *skb; + char *state = "UNK"; + int err; + + /* currently we have no sk for the RX session */ + if (!sk) + return; + + jsk = j1939_sk(sk); + + if (!(jsk->state & J1939_SOCK_ERRQUEUE)) + return; + + skb = j1939_sk_get_timestamping_opt_stats(session); + if (!skb) + return; + + skb->tstamp = ktime_get_real(); + + BUILD_BUG_ON(sizeof(struct sock_exterr_skb) > sizeof(skb->cb)); + + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + switch (type) { + case J1939_ERRQUEUE_ACK: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)) + return; + + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = SCM_TSTAMP_ACK; + state = "ACK"; + break; + case J1939_ERRQUEUE_SCHED: + if (!(sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)) + return; + + serr->ee.ee_errno = ENOMSG; + serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; + serr->ee.ee_info = SCM_TSTAMP_SCHED; + state = "SCH"; + break; + case J1939_ERRQUEUE_ABORT: + serr->ee.ee_errno = session->err; + serr->ee.ee_origin = SO_EE_ORIGIN_LOCAL; + serr->ee.ee_info = J1939_EE_INFO_TX_ABORT; + state = "ABT"; + break; + default: + netdev_err(priv->ndev, "Unknown errqueue type %i\n", type); + } + + serr->opt_stats = true; + if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) + serr->ee.ee_data = session->tskey; + + netdev_dbg(session->priv->ndev, "%s: 0x%p tskey: %i, state: %s\n", + __func__, session, session->tskey, state); + err = sock_queue_err_skb(sk, skb); + + if (err) + kfree_skb(skb); +}; + +void j1939_sk_send_loop_abort(struct sock *sk, int err) +{ + sk->sk_err = err; + + sk->sk_error_report(sk); +} + +static int j1939_sk_send_loop(struct j1939_priv *priv, struct sock *sk, + struct msghdr *msg, size_t size) + +{ + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_session *session = j1939_sk_get_incomplete_session(jsk); + struct sk_buff *skb; + size_t segment_size, todo_size; + int ret = 0; + + if (session && + session->total_message_size != session->total_queued_size + size) { + j1939_session_put(session); + return -EIO; + } + + todo_size = size; + + while (todo_size) { + struct j1939_sk_buff_cb *skcb; + + segment_size = min_t(size_t, J1939_MAX_TP_PACKET_SIZE, + todo_size); + + /* Allocate skb for one segment */ + skb = j1939_sk_alloc_skb(priv->ndev, sk, msg, segment_size, + &ret); + if (ret) + break; + + skcb = j1939_skb_to_cb(skb); + + if (!session) { + /* at this point the size should be full size + * of the session + */ + skcb->offset = 0; + session = j1939_tp_send(priv, skb, size); + if (IS_ERR(session)) { + ret = PTR_ERR(session); + goto kfree_skb; + } + if (j1939_sk_queue_session(session)) { + /* try to activate session if we a + * fist in the queue + */ + if (!j1939_session_activate(session)) { + j1939_tp_schedule_txtimer(session, 0); + } else { + ret = -EBUSY; + session->err = ret; + j1939_sk_queue_drop_all(priv, jsk, + EBUSY); + break; + } + } + } else { + skcb->offset = session->total_queued_size; + j1939_session_skb_queue(session, skb); + } + + todo_size -= segment_size; + session->total_queued_size += segment_size; + } + + switch (ret) { + case 0: /* OK */ + if (todo_size) + netdev_warn(priv->ndev, + "no error found and not completely queued?! %zu\n", + todo_size); + ret = size; + break; + case -ERESTARTSYS: + ret = -EINTR; + /* fall through */ + case -EAGAIN: /* OK */ + if (todo_size != size) + ret = size - todo_size; + break; + default: /* ERROR */ + break; + } + + if (session) + j1939_session_put(session); + + return ret; + + kfree_skb: + kfree_skb(skb); + return ret; +} + +static int j1939_sk_sendmsg(struct socket *sock, struct msghdr *msg, + size_t size) +{ + struct sock *sk = sock->sk; + struct j1939_sock *jsk = j1939_sk(sk); + struct j1939_priv *priv = jsk->priv; + int ifindex; + int ret; + + /* various socket state tests */ + if (!(jsk->state & J1939_SOCK_BOUND)) + return -EBADFD; + + ifindex = jsk->ifindex; + + if (!jsk->addr.src_name && jsk->addr.sa == J1939_NO_ADDR) + /* no source address assigned yet */ + return -EBADFD; + + /* deal with provided destination address info */ + if (msg->msg_name) { + struct sockaddr_can *addr = msg->msg_name; + + if (msg->msg_namelen < J1939_MIN_NAMELEN) + return -EINVAL; + + if (addr->can_family != AF_CAN) + return -EINVAL; + + if (addr->can_ifindex && addr->can_ifindex != ifindex) + return -EBADFD; + + if (j1939_pgn_is_valid(addr->can_addr.j1939.pgn) && + !j1939_pgn_is_clean_pdu(addr->can_addr.j1939.pgn)) + return -EINVAL; + + if (!addr->can_addr.j1939.name && + addr->can_addr.j1939.addr == J1939_NO_ADDR && + !sock_flag(sk, SOCK_BROADCAST)) + /* broadcast, but SO_BROADCAST not set */ + return -EACCES; + } else { + if (!jsk->addr.dst_name && jsk->addr.da == J1939_NO_ADDR && + !sock_flag(sk, SOCK_BROADCAST)) + /* broadcast, but SO_BROADCAST not set */ + return -EACCES; + } + + ret = j1939_sk_send_loop(priv, sk, msg, size); + + return ret; +} + +void j1939_sk_netdev_event_netdown(struct j1939_priv *priv) +{ + struct j1939_sock *jsk; + int error_code = ENETDOWN; + + spin_lock_bh(&priv->j1939_socks_lock); + list_for_each_entry(jsk, &priv->j1939_socks, list) { + jsk->sk.sk_err = error_code; + if (!sock_flag(&jsk->sk, SOCK_DEAD)) + jsk->sk.sk_error_report(&jsk->sk); + + j1939_sk_queue_drop_all(priv, jsk, error_code); + } + spin_unlock_bh(&priv->j1939_socks_lock); +} + +static int j1939_sk_no_ioctlcmd(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* no ioctls for socket layer -> hand it down to NIC layer */ + return -ENOIOCTLCMD; +} + +static const struct proto_ops j1939_ops = { + .family = PF_CAN, + .release = j1939_sk_release, + .bind = j1939_sk_bind, + .connect = j1939_sk_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = j1939_sk_getname, + .poll = datagram_poll, + .ioctl = j1939_sk_no_ioctlcmd, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = j1939_sk_setsockopt, + .getsockopt = j1939_sk_getsockopt, + .sendmsg = j1939_sk_sendmsg, + .recvmsg = j1939_sk_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +static struct proto j1939_proto __read_mostly = { + .name = "CAN_J1939", + .owner = THIS_MODULE, + .obj_size = sizeof(struct j1939_sock), + .init = j1939_sk_init, +}; + +const struct can_proto j1939_can_proto = { + .type = SOCK_DGRAM, + .protocol = CAN_J1939, + .ops = &j1939_ops, + .prot = &j1939_proto, +}; diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c new file mode 100644 index 000000000000..fe000ea757ea --- /dev/null +++ b/net/can/j1939/transport.c @@ -0,0 +1,2027 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2010-2011 EIA Electronics, +// Kurt Van Dijck <kurt.van.dijck@eia.be> +// Copyright (c) 2018 Protonic, +// Robin van der Gracht <robin@protonic.nl> +// Copyright (c) 2017-2019 Pengutronix, +// Marc Kleine-Budde <kernel@pengutronix.de> +// Copyright (c) 2017-2019 Pengutronix, +// Oleksij Rempel <kernel@pengutronix.de> + +#include <linux/can/skb.h> + +#include "j1939-priv.h" + +#define J1939_XTP_TX_RETRY_LIMIT 100 + +#define J1939_ETP_PGN_CTL 0xc800 +#define J1939_ETP_PGN_DAT 0xc700 +#define J1939_TP_PGN_CTL 0xec00 +#define J1939_TP_PGN_DAT 0xeb00 + +#define J1939_TP_CMD_RTS 0x10 +#define J1939_TP_CMD_CTS 0x11 +#define J1939_TP_CMD_EOMA 0x13 +#define J1939_TP_CMD_BAM 0x20 +#define J1939_TP_CMD_ABORT 0xff + +#define J1939_ETP_CMD_RTS 0x14 +#define J1939_ETP_CMD_CTS 0x15 +#define J1939_ETP_CMD_DPO 0x16 +#define J1939_ETP_CMD_EOMA 0x17 +#define J1939_ETP_CMD_ABORT 0xff + +enum j1939_xtp_abort { + J1939_XTP_NO_ABORT = 0, + J1939_XTP_ABORT_BUSY = 1, + /* Already in one or more connection managed sessions and + * cannot support another. + * + * EALREADY: + * Operation already in progress + */ + + J1939_XTP_ABORT_RESOURCE = 2, + /* System resources were needed for another task so this + * connection managed session was terminated. + * + * EMSGSIZE: + * The socket type requires that message be sent atomically, + * and the size of the message to be sent made this + * impossible. + */ + + J1939_XTP_ABORT_TIMEOUT = 3, + /* A timeout occurred and this is the connection abort to + * close the session. + * + * EHOSTUNREACH: + * The destination host cannot be reached (probably because + * the host is down or a remote router cannot reach it). + */ + + J1939_XTP_ABORT_GENERIC = 4, + /* CTS messages received when data transfer is in progress + * + * EBADMSG: + * Not a data message + */ + + J1939_XTP_ABORT_FAULT = 5, + /* Maximal retransmit request limit reached + * + * ENOTRECOVERABLE: + * State not recoverable + */ + + J1939_XTP_ABORT_UNEXPECTED_DATA = 6, + /* Unexpected data transfer packet + * + * ENOTCONN: + * Transport endpoint is not connected + */ + + J1939_XTP_ABORT_BAD_SEQ = 7, + /* Bad sequence number (and software is not able to recover) + * + * EILSEQ: + * Illegal byte sequence + */ + + J1939_XTP_ABORT_DUP_SEQ = 8, + /* Duplicate sequence number (and software is not able to + * recover) + */ + + J1939_XTP_ABORT_EDPO_UNEXPECTED = 9, + /* Unexpected EDPO packet (ETP) or Message size > 1785 bytes + * (TP) + */ + + J1939_XTP_ABORT_BAD_EDPO_PGN = 10, + /* Unexpected EDPO PGN (PGN in EDPO is bad) */ + + J1939_XTP_ABORT_EDPO_OUTOF_CTS = 11, + /* EDPO number of packets is greater than CTS */ + + J1939_XTP_ABORT_BAD_EDPO_OFFSET = 12, + /* Bad EDPO offset */ + + J1939_XTP_ABORT_OTHER_DEPRECATED = 13, + /* Deprecated. Use 250 instead (Any other reason) */ + + J1939_XTP_ABORT_ECTS_UNXPECTED_PGN = 14, + /* Unexpected ECTS PGN (PGN in ECTS is bad) */ + + J1939_XTP_ABORT_ECTS_TOO_BIG = 15, + /* ECTS requested packets exceeds message size */ + + J1939_XTP_ABORT_OTHER = 250, + /* Any other reason (if a Connection Abort reason is + * identified that is not listed in the table use code 250) + */ +}; + +static unsigned int j1939_tp_block = 255; +static unsigned int j1939_tp_packet_delay; +static unsigned int j1939_tp_padding = 1; + +/* helpers */ +static const char *j1939_xtp_abort_to_str(enum j1939_xtp_abort abort) +{ + switch (abort) { + case J1939_XTP_ABORT_BUSY: + return "Already in one or more connection managed sessions and cannot support another."; + case J1939_XTP_ABORT_RESOURCE: + return "System resources were needed for another task so this connection managed session was terminated."; + case J1939_XTP_ABORT_TIMEOUT: + return "A timeout occurred and this is the connection abort to close the session."; + case J1939_XTP_ABORT_GENERIC: + return "CTS messages received when data transfer is in progress"; + case J1939_XTP_ABORT_FAULT: + return "Maximal retransmit request limit reached"; + case J1939_XTP_ABORT_UNEXPECTED_DATA: + return "Unexpected data transfer packet"; + case J1939_XTP_ABORT_BAD_SEQ: + return "Bad sequence number (and software is not able to recover)"; + case J1939_XTP_ABORT_DUP_SEQ: + return "Duplicate sequence number (and software is not able to recover)"; + case J1939_XTP_ABORT_EDPO_UNEXPECTED: + return "Unexpected EDPO packet (ETP) or Message size > 1785 bytes (TP)"; + case J1939_XTP_ABORT_BAD_EDPO_PGN: + return "Unexpected EDPO PGN (PGN in EDPO is bad)"; + case J1939_XTP_ABORT_EDPO_OUTOF_CTS: + return "EDPO number of packets is greater than CTS"; + case J1939_XTP_ABORT_BAD_EDPO_OFFSET: + return "Bad EDPO offset"; + case J1939_XTP_ABORT_OTHER_DEPRECATED: + return "Deprecated. Use 250 instead (Any other reason)"; + case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: + return "Unexpected ECTS PGN (PGN in ECTS is bad)"; + case J1939_XTP_ABORT_ECTS_TOO_BIG: + return "ECTS requested packets exceeds message size"; + case J1939_XTP_ABORT_OTHER: + return "Any other reason (if a Connection Abort reason is identified that is not listed in the table use code 250)"; + default: + return "<unknown>"; + } +} + +static int j1939_xtp_abort_to_errno(struct j1939_priv *priv, + enum j1939_xtp_abort abort) +{ + int err; + + switch (abort) { + case J1939_XTP_NO_ABORT: + WARN_ON_ONCE(abort == J1939_XTP_NO_ABORT); + err = 0; + break; + case J1939_XTP_ABORT_BUSY: + err = EALREADY; + break; + case J1939_XTP_ABORT_RESOURCE: + err = EMSGSIZE; + break; + case J1939_XTP_ABORT_TIMEOUT: + err = EHOSTUNREACH; + break; + case J1939_XTP_ABORT_GENERIC: + err = EBADMSG; + break; + case J1939_XTP_ABORT_FAULT: + err = ENOTRECOVERABLE; + break; + case J1939_XTP_ABORT_UNEXPECTED_DATA: + err = ENOTCONN; + break; + case J1939_XTP_ABORT_BAD_SEQ: + err = EILSEQ; + break; + case J1939_XTP_ABORT_DUP_SEQ: + err = EPROTO; + break; + case J1939_XTP_ABORT_EDPO_UNEXPECTED: + err = EPROTO; + break; + case J1939_XTP_ABORT_BAD_EDPO_PGN: + err = EPROTO; + break; + case J1939_XTP_ABORT_EDPO_OUTOF_CTS: + err = EPROTO; + break; + case J1939_XTP_ABORT_BAD_EDPO_OFFSET: + err = EPROTO; + break; + case J1939_XTP_ABORT_OTHER_DEPRECATED: + err = EPROTO; + break; + case J1939_XTP_ABORT_ECTS_UNXPECTED_PGN: + err = EPROTO; + break; + case J1939_XTP_ABORT_ECTS_TOO_BIG: + err = EPROTO; + break; + case J1939_XTP_ABORT_OTHER: + err = EPROTO; + break; + default: + netdev_warn(priv->ndev, "Unknown abort code %i", abort); + err = EPROTO; + } + + return err; +} + +static inline void j1939_session_list_lock(struct j1939_priv *priv) +{ + spin_lock_bh(&priv->active_session_list_lock); +} + +static inline void j1939_session_list_unlock(struct j1939_priv *priv) +{ + spin_unlock_bh(&priv->active_session_list_lock); +} + +void j1939_session_get(struct j1939_session *session) +{ + kref_get(&session->kref); +} + +/* session completion functions */ +static void __j1939_session_drop(struct j1939_session *session) +{ + if (!session->transmission) + return; + + j1939_sock_pending_del(session->sk); +} + +static void j1939_session_destroy(struct j1939_session *session) +{ + if (session->err) + j1939_sk_errqueue(session, J1939_ERRQUEUE_ABORT); + else + j1939_sk_errqueue(session, J1939_ERRQUEUE_ACK); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + skb_queue_purge(&session->skb_queue); + __j1939_session_drop(session); + j1939_priv_put(session->priv); + kfree(session); +} + +static void __j1939_session_release(struct kref *kref) +{ + struct j1939_session *session = container_of(kref, struct j1939_session, + kref); + + j1939_session_destroy(session); +} + +void j1939_session_put(struct j1939_session *session) +{ + kref_put(&session->kref, __j1939_session_release); +} + +static void j1939_session_txtimer_cancel(struct j1939_session *session) +{ + if (hrtimer_cancel(&session->txtimer)) + j1939_session_put(session); +} + +static void j1939_session_rxtimer_cancel(struct j1939_session *session) +{ + if (hrtimer_cancel(&session->rxtimer)) + j1939_session_put(session); +} + +void j1939_session_timers_cancel(struct j1939_session *session) +{ + j1939_session_txtimer_cancel(session); + j1939_session_rxtimer_cancel(session); +} + +static inline bool j1939_cb_is_broadcast(const struct j1939_sk_buff_cb *skcb) +{ + return (!skcb->addr.dst_name && (skcb->addr.da == 0xff)); +} + +static void j1939_session_skb_drop_old(struct j1939_session *session) +{ + struct sk_buff *do_skb; + struct j1939_sk_buff_cb *do_skcb; + unsigned int offset_start; + unsigned long flags; + + if (skb_queue_len(&session->skb_queue) < 2) + return; + + offset_start = session->pkt.tx_acked * 7; + + spin_lock_irqsave(&session->skb_queue.lock, flags); + do_skb = skb_peek(&session->skb_queue); + do_skcb = j1939_skb_to_cb(do_skb); + + if ((do_skcb->offset + do_skb->len) < offset_start) { + __skb_unlink(do_skb, &session->skb_queue); + kfree_skb(do_skb); + } + spin_unlock_irqrestore(&session->skb_queue.lock, flags); +} + +void j1939_session_skb_queue(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_priv *priv = session->priv; + + j1939_ac_fixup(priv, skb); + + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + + skcb->flags |= J1939_ECU_LOCAL_SRC; + + skb_queue_tail(&session->skb_queue, skb); +} + +static struct sk_buff *j1939_session_skb_find(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *skb = NULL; + struct sk_buff *do_skb; + struct j1939_sk_buff_cb *do_skcb; + unsigned int offset_start; + unsigned long flags; + + offset_start = session->pkt.dpo * 7; + + spin_lock_irqsave(&session->skb_queue.lock, flags); + skb_queue_walk(&session->skb_queue, do_skb) { + do_skcb = j1939_skb_to_cb(do_skb); + + if (offset_start >= do_skcb->offset && + offset_start < (do_skcb->offset + do_skb->len)) { + skb = do_skb; + } + } + spin_unlock_irqrestore(&session->skb_queue.lock, flags); + + if (!skb) + netdev_dbg(priv->ndev, "%s: 0x%p: no skb found for start: %i, queue size: %i\n", + __func__, session, offset_start, + skb_queue_len(&session->skb_queue)); + + return skb; +} + +/* see if we are receiver + * returns 0 for broadcasts, although we will receive them + */ +static inline int j1939_tp_im_receiver(const struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & J1939_ECU_LOCAL_DST; +} + +/* see if we are sender */ +static inline int j1939_tp_im_transmitter(const struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & J1939_ECU_LOCAL_SRC; +} + +/* see if we are involved as either receiver or transmitter */ +static int j1939_tp_im_involved(const struct j1939_sk_buff_cb *skcb, bool swap) +{ + if (swap) + return j1939_tp_im_receiver(skcb); + else + return j1939_tp_im_transmitter(skcb); +} + +static int j1939_tp_im_involved_anydir(struct j1939_sk_buff_cb *skcb) +{ + return skcb->flags & (J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); +} + +/* extract pgn from flow-ctl message */ +static inline pgn_t j1939_xtp_ctl_to_pgn(const u8 *dat) +{ + pgn_t pgn; + + pgn = (dat[7] << 16) | (dat[6] << 8) | (dat[5] << 0); + if (j1939_pgn_is_pdu1(pgn)) + pgn &= 0xffff00; + return pgn; +} + +static inline unsigned int j1939_tp_ctl_to_size(const u8 *dat) +{ + return (dat[2] << 8) + (dat[1] << 0); +} + +static inline unsigned int j1939_etp_ctl_to_packet(const u8 *dat) +{ + return (dat[4] << 16) | (dat[3] << 8) | (dat[2] << 0); +} + +static inline unsigned int j1939_etp_ctl_to_size(const u8 *dat) +{ + return (dat[4] << 24) | (dat[3] << 16) | + (dat[2] << 8) | (dat[1] << 0); +} + +/* find existing session: + * reverse: swap cb's src & dst + * there is no problem with matching broadcasts, since + * broadcasts (no dst, no da) would never call this + * with reverse == true + */ +static bool j1939_session_match(struct j1939_addr *se_addr, + struct j1939_addr *sk_addr, bool reverse) +{ + if (se_addr->type != sk_addr->type) + return false; + + if (reverse) { + if (se_addr->src_name) { + if (se_addr->src_name != sk_addr->dst_name) + return false; + } else if (se_addr->sa != sk_addr->da) { + return false; + } + + if (se_addr->dst_name) { + if (se_addr->dst_name != sk_addr->src_name) + return false; + } else if (se_addr->da != sk_addr->sa) { + return false; + } + } else { + if (se_addr->src_name) { + if (se_addr->src_name != sk_addr->src_name) + return false; + } else if (se_addr->sa != sk_addr->sa) { + return false; + } + + if (se_addr->dst_name) { + if (se_addr->dst_name != sk_addr->dst_name) + return false; + } else if (se_addr->da != sk_addr->da) { + return false; + } + } + + return true; +} + +static struct +j1939_session *j1939_session_get_by_addr_locked(struct j1939_priv *priv, + struct list_head *root, + struct j1939_addr *addr, + bool reverse, bool transmitter) +{ + struct j1939_session *session; + + lockdep_assert_held(&priv->active_session_list_lock); + + list_for_each_entry(session, root, active_session_list_entry) { + j1939_session_get(session); + if (j1939_session_match(&session->skcb.addr, addr, reverse) && + session->transmission == transmitter) + return session; + j1939_session_put(session); + } + + return NULL; +} + +static struct +j1939_session *j1939_session_get_simple(struct j1939_priv *priv, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + lockdep_assert_held(&priv->active_session_list_lock); + + list_for_each_entry(session, &priv->active_session_list, + active_session_list_entry) { + j1939_session_get(session); + if (session->skcb.addr.type == J1939_SIMPLE && + session->tskey == skcb->tskey && session->sk == skb->sk) + return session; + j1939_session_put(session); + } + + return NULL; +} + +static struct +j1939_session *j1939_session_get_by_addr(struct j1939_priv *priv, + struct j1939_addr *addr, + bool reverse, bool transmitter) +{ + struct j1939_session *session; + + j1939_session_list_lock(priv); + session = j1939_session_get_by_addr_locked(priv, + &priv->active_session_list, + addr, reverse, transmitter); + j1939_session_list_unlock(priv); + + return session; +} + +static void j1939_skbcb_swap(struct j1939_sk_buff_cb *skcb) +{ + u8 tmp = 0; + + swap(skcb->addr.dst_name, skcb->addr.src_name); + swap(skcb->addr.da, skcb->addr.sa); + + /* swap SRC and DST flags, leave other untouched */ + if (skcb->flags & J1939_ECU_LOCAL_SRC) + tmp |= J1939_ECU_LOCAL_DST; + if (skcb->flags & J1939_ECU_LOCAL_DST) + tmp |= J1939_ECU_LOCAL_SRC; + skcb->flags &= ~(J1939_ECU_LOCAL_SRC | J1939_ECU_LOCAL_DST); + skcb->flags |= tmp; +} + +static struct +sk_buff *j1939_tp_tx_dat_new(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool ctl, + bool swap_src_dst) +{ + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + + skb = alloc_skb(sizeof(struct can_frame) + sizeof(struct can_skb_priv), + GFP_ATOMIC); + if (unlikely(!skb)) + return ERR_PTR(-ENOMEM); + + skb->dev = priv->ndev; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = priv->ndev->ifindex; + /* reserve CAN header */ + skb_reserve(skb, offsetof(struct can_frame, data)); + + memcpy(skb->cb, re_skcb, sizeof(skb->cb)); + skcb = j1939_skb_to_cb(skb); + if (swap_src_dst) + j1939_skbcb_swap(skcb); + + if (ctl) { + if (skcb->addr.type == J1939_ETP) + skcb->addr.pgn = J1939_ETP_PGN_CTL; + else + skcb->addr.pgn = J1939_TP_PGN_CTL; + } else { + if (skcb->addr.type == J1939_ETP) + skcb->addr.pgn = J1939_ETP_PGN_DAT; + else + skcb->addr.pgn = J1939_TP_PGN_DAT; + } + + return skb; +} + +/* TP transmit packet functions */ +static int j1939_tp_tx_dat(struct j1939_session *session, + const u8 *dat, int len) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *skb; + + skb = j1939_tp_tx_dat_new(priv, &session->skcb, + false, false); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + skb_put_data(skb, dat, len); + if (j1939_tp_padding && len < 8) + memset(skb_put(skb, 8 - len), 0xff, 8 - len); + + return j1939_send_one(priv, skb); +} + +static int j1939_xtp_do_tx_ctl(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool swap_src_dst, pgn_t pgn, const u8 *dat) +{ + struct sk_buff *skb; + u8 *skdat; + + if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) + return 0; + + skb = j1939_tp_tx_dat_new(priv, re_skcb, true, swap_src_dst); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + skdat = skb_put(skb, 8); + memcpy(skdat, dat, 5); + skdat[5] = (pgn >> 0); + skdat[6] = (pgn >> 8); + skdat[7] = (pgn >> 16); + + return j1939_send_one(priv, skb); +} + +static inline int j1939_tp_tx_ctl(struct j1939_session *session, + bool swap_src_dst, const u8 *dat) +{ + struct j1939_priv *priv = session->priv; + + return j1939_xtp_do_tx_ctl(priv, &session->skcb, + swap_src_dst, + session->skcb.addr.pgn, dat); +} + +static int j1939_xtp_tx_abort(struct j1939_priv *priv, + const struct j1939_sk_buff_cb *re_skcb, + bool swap_src_dst, + enum j1939_xtp_abort err, + pgn_t pgn) +{ + u8 dat[5]; + + if (!j1939_tp_im_involved(re_skcb, swap_src_dst)) + return 0; + + memset(dat, 0xff, sizeof(dat)); + dat[0] = J1939_TP_CMD_ABORT; + dat[1] = err; + return j1939_xtp_do_tx_ctl(priv, re_skcb, swap_src_dst, pgn, dat); +} + +void j1939_tp_schedule_txtimer(struct j1939_session *session, int msec) +{ + j1939_session_get(session); + hrtimer_start(&session->txtimer, ms_to_ktime(msec), + HRTIMER_MODE_REL_SOFT); +} + +static inline void j1939_tp_set_rxtimeout(struct j1939_session *session, + int msec) +{ + j1939_session_rxtimer_cancel(session); + j1939_session_get(session); + hrtimer_start(&session->rxtimer, ms_to_ktime(msec), + HRTIMER_MODE_REL_SOFT); +} + +static int j1939_session_tx_rts(struct j1939_session *session) +{ + u8 dat[8]; + int ret; + + memset(dat, 0xff, sizeof(dat)); + + dat[1] = (session->total_message_size >> 0); + dat[2] = (session->total_message_size >> 8); + dat[3] = session->pkt.total; + + if (session->skcb.addr.type == J1939_ETP) { + dat[0] = J1939_ETP_CMD_RTS; + dat[1] = (session->total_message_size >> 0); + dat[2] = (session->total_message_size >> 8); + dat[3] = (session->total_message_size >> 16); + dat[4] = (session->total_message_size >> 24); + } else if (j1939_cb_is_broadcast(&session->skcb)) { + dat[0] = J1939_TP_CMD_BAM; + /* fake cts for broadcast */ + session->pkt.tx = 0; + } else { + dat[0] = J1939_TP_CMD_RTS; + dat[4] = dat[3]; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, false, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + if (dat[0] == J1939_TP_CMD_BAM) + j1939_tp_schedule_txtimer(session, 50); + + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_dpo(struct j1939_session *session) +{ + unsigned int pkt; + u8 dat[8]; + int ret; + + memset(dat, 0xff, sizeof(dat)); + + dat[0] = J1939_ETP_CMD_DPO; + session->pkt.dpo = session->pkt.tx_acked; + pkt = session->pkt.dpo; + dat[1] = session->pkt.last - session->pkt.tx_acked; + dat[2] = (pkt >> 0); + dat[3] = (pkt >> 8); + dat[4] = (pkt >> 16); + + ret = j1939_tp_tx_ctl(session, false, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + j1939_tp_set_rxtimeout(session, 1250); + session->pkt.tx = session->pkt.tx_acked; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_dat(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sk_buff_cb *skcb; + int offset, pkt_done, pkt_end; + unsigned int len, pdelay; + struct sk_buff *se_skb; + const u8 *tpdat; + int ret = 0; + u8 dat[8]; + + se_skb = j1939_session_skb_find(session); + if (!se_skb) + return -ENOBUFS; + + skcb = j1939_skb_to_cb(se_skb); + tpdat = se_skb->data; + ret = 0; + pkt_done = 0; + if (session->skcb.addr.type != J1939_ETP && + j1939_cb_is_broadcast(&session->skcb)) + pkt_end = session->pkt.total; + else + pkt_end = session->pkt.last; + + while (session->pkt.tx < pkt_end) { + dat[0] = session->pkt.tx - session->pkt.dpo + 1; + offset = (session->pkt.tx * 7) - skcb->offset; + len = se_skb->len - offset; + if (len > 7) + len = 7; + + memcpy(&dat[1], &tpdat[offset], len); + ret = j1939_tp_tx_dat(session, dat, len + 1); + if (ret < 0) { + /* ENOBUS == CAN interface TX queue is full */ + if (ret != -ENOBUFS) + netdev_alert(priv->ndev, + "%s: 0x%p: queue data error: %i\n", + __func__, session, ret); + break; + } + + session->last_txcmd = 0xff; + pkt_done++; + session->pkt.tx++; + pdelay = j1939_cb_is_broadcast(&session->skcb) ? 50 : + j1939_tp_packet_delay; + + if (session->pkt.tx < session->pkt.total && pdelay) { + j1939_tp_schedule_txtimer(session, pdelay); + break; + } + } + + if (pkt_done) + j1939_tp_set_rxtimeout(session, 250); + + return ret; +} + +static int j1939_xtp_txnext_transmiter(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (!j1939_tp_im_transmitter(&session->skcb)) { + netdev_alert(priv->ndev, "%s: 0x%p: called by not transmitter!\n", + __func__, session); + return -EINVAL; + } + + switch (session->last_cmd) { + case 0: + ret = j1939_session_tx_rts(session); + break; + + case J1939_ETP_CMD_CTS: + if (session->last_txcmd != J1939_ETP_CMD_DPO) { + ret = j1939_session_tx_dpo(session); + if (ret) + return ret; + } + + /* fall through */ + case J1939_TP_CMD_CTS: + case 0xff: /* did some data */ + case J1939_ETP_CMD_DPO: + case J1939_TP_CMD_BAM: + ret = j1939_session_tx_dat(session); + + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", + __func__, session, session->last_cmd); + } + + return ret; +} + +static int j1939_session_tx_cts(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + unsigned int pkt, len; + int ret; + u8 dat[8]; + + if (!j1939_sk_recv_match(priv, &session->skcb)) + return -ENOENT; + + len = session->pkt.total - session->pkt.rx; + len = min3(len, session->pkt.block, j1939_tp_block ?: 255); + memset(dat, 0xff, sizeof(dat)); + + if (session->skcb.addr.type == J1939_ETP) { + pkt = session->pkt.rx + 1; + dat[0] = J1939_ETP_CMD_CTS; + dat[1] = len; + dat[2] = (pkt >> 0); + dat[3] = (pkt >> 8); + dat[4] = (pkt >> 16); + } else { + dat[0] = J1939_TP_CMD_CTS; + dat[1] = len; + dat[2] = session->pkt.rx + 1; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, true, dat); + if (ret < 0) + return ret; + + if (len) + /* only mark cts done when len is set */ + session->last_txcmd = dat[0]; + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_session_tx_eoma(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + u8 dat[8]; + int ret; + + if (!j1939_sk_recv_match(priv, &session->skcb)) + return -ENOENT; + + memset(dat, 0xff, sizeof(dat)); + + if (session->skcb.addr.type == J1939_ETP) { + dat[0] = J1939_ETP_CMD_EOMA; + dat[1] = session->total_message_size >> 0; + dat[2] = session->total_message_size >> 8; + dat[3] = session->total_message_size >> 16; + dat[4] = session->total_message_size >> 24; + } else { + dat[0] = J1939_TP_CMD_EOMA; + dat[1] = session->total_message_size; + dat[2] = session->total_message_size >> 8; + dat[3] = session->pkt.total; + } + + if (dat[0] == session->last_txcmd) + /* done already */ + return 0; + + ret = j1939_tp_tx_ctl(session, true, dat); + if (ret < 0) + return ret; + + session->last_txcmd = dat[0]; + + /* wait for the EOMA packet to come in */ + j1939_tp_set_rxtimeout(session, 1250); + + netdev_dbg(session->priv->ndev, "%p: 0x%p\n", __func__, session); + + return 0; +} + +static int j1939_xtp_txnext_receiver(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (!j1939_tp_im_receiver(&session->skcb)) { + netdev_alert(priv->ndev, "%s: 0x%p: called by not receiver!\n", + __func__, session); + return -EINVAL; + } + + switch (session->last_cmd) { + case J1939_TP_CMD_RTS: + case J1939_ETP_CMD_RTS: + ret = j1939_session_tx_cts(session); + break; + + case J1939_ETP_CMD_CTS: + case J1939_TP_CMD_CTS: + case 0xff: /* did some data */ + case J1939_ETP_CMD_DPO: + if ((session->skcb.addr.type == J1939_TP && + j1939_cb_is_broadcast(&session->skcb))) + break; + + if (session->pkt.rx >= session->pkt.total) { + ret = j1939_session_tx_eoma(session); + } else if (session->pkt.rx >= session->pkt.last) { + session->last_txcmd = 0; + ret = j1939_session_tx_cts(session); + } + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: unexpected last_cmd: %x\n", + __func__, session, session->last_cmd); + } + + return ret; +} + +static int j1939_simple_txnext(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct sk_buff *se_skb = j1939_session_skb_find(session); + struct sk_buff *skb; + int ret; + + if (!se_skb) + return 0; + + skb = skb_clone(se_skb, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + can_skb_set_owner(skb, se_skb->sk); + + j1939_tp_set_rxtimeout(session, J1939_SIMPLE_ECHO_TIMEOUT_MS); + + ret = j1939_send_one(priv, skb); + if (ret) + return ret; + + j1939_sk_errqueue(session, J1939_ERRQUEUE_SCHED); + j1939_sk_queue_activate_next(session); + + return 0; +} + +static bool j1939_session_deactivate_locked(struct j1939_session *session) +{ + bool active = false; + + lockdep_assert_held(&session->priv->active_session_list_lock); + + if (session->state >= J1939_SESSION_ACTIVE && + session->state < J1939_SESSION_ACTIVE_MAX) { + active = true; + + list_del_init(&session->active_session_list_entry); + session->state = J1939_SESSION_DONE; + j1939_session_put(session); + } + + return active; +} + +static bool j1939_session_deactivate(struct j1939_session *session) +{ + bool active; + + j1939_session_list_lock(session->priv); + active = j1939_session_deactivate_locked(session); + j1939_session_list_unlock(session->priv); + + return active; +} + +static void +j1939_session_deactivate_activate_next(struct j1939_session *session) +{ + if (j1939_session_deactivate(session)) + j1939_sk_queue_activate_next(session); +} + +static void j1939_session_cancel(struct j1939_session *session, + enum j1939_xtp_abort err) +{ + struct j1939_priv *priv = session->priv; + + WARN_ON_ONCE(!err); + + session->err = j1939_xtp_abort_to_errno(priv, err); + /* do not send aborts on incoming broadcasts */ + if (!j1939_cb_is_broadcast(&session->skcb)) { + session->state = J1939_SESSION_WAITING_ABORT; + j1939_xtp_tx_abort(priv, &session->skcb, + !session->transmission, + err, session->skcb.addr.pgn); + } + + if (session->sk) + j1939_sk_send_loop_abort(session->sk, session->err); +} + +static enum hrtimer_restart j1939_tp_txtimer(struct hrtimer *hrtimer) +{ + struct j1939_session *session = + container_of(hrtimer, struct j1939_session, txtimer); + struct j1939_priv *priv = session->priv; + int ret = 0; + + if (session->skcb.addr.type == J1939_SIMPLE) { + ret = j1939_simple_txnext(session); + } else { + if (session->transmission) + ret = j1939_xtp_txnext_transmiter(session); + else + ret = j1939_xtp_txnext_receiver(session); + } + + switch (ret) { + case -ENOBUFS: + /* Retry limit is currently arbitrary chosen */ + if (session->tx_retry < J1939_XTP_TX_RETRY_LIMIT) { + session->tx_retry++; + j1939_tp_schedule_txtimer(session, + 10 + prandom_u32_max(16)); + } else { + netdev_alert(priv->ndev, "%s: 0x%p: tx retry count reached\n", + __func__, session); + session->err = -ENETUNREACH; + j1939_session_rxtimer_cancel(session); + j1939_session_deactivate_activate_next(session); + } + break; + case -ENETDOWN: + /* In this case we should get a netdev_event(), all active + * sessions will be cleared by + * j1939_cancel_all_active_sessions(). So handle this as an + * error, but let j1939_cancel_all_active_sessions() do the + * cleanup including propagation of the error to user space. + */ + break; + case 0: + session->tx_retry = 0; + break; + default: + netdev_alert(priv->ndev, "%s: 0x%p: tx aborted with unknown reason: %i\n", + __func__, session, ret); + if (session->skcb.addr.type != J1939_SIMPLE) { + j1939_tp_set_rxtimeout(session, + J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_OTHER); + } else { + session->err = ret; + j1939_session_rxtimer_cancel(session); + j1939_session_deactivate_activate_next(session); + } + } + + j1939_session_put(session); + + return HRTIMER_NORESTART; +} + +static void j1939_session_completed(struct j1939_session *session) +{ + struct sk_buff *skb; + + if (!session->transmission) { + skb = j1939_session_skb_find(session); + /* distribute among j1939 receivers */ + j1939_sk_recv(session->priv, skb); + } + + j1939_session_deactivate_activate_next(session); +} + +static enum hrtimer_restart j1939_tp_rxtimer(struct hrtimer *hrtimer) +{ + struct j1939_session *session = container_of(hrtimer, + struct j1939_session, + rxtimer); + struct j1939_priv *priv = session->priv; + + if (session->state == J1939_SESSION_WAITING_ABORT) { + netdev_alert(priv->ndev, "%s: 0x%p: abort rx timeout. Force session deactivation\n", + __func__, session); + + j1939_session_deactivate_activate_next(session); + + } else if (session->skcb.addr.type == J1939_SIMPLE) { + netdev_alert(priv->ndev, "%s: 0x%p: Timeout. Failed to send simple message.\n", + __func__, session); + + /* The message is probably stuck in the CAN controller and can + * be send as soon as CAN bus is in working state again. + */ + session->err = -ETIME; + j1939_session_deactivate(session); + } else { + netdev_alert(priv->ndev, "%s: 0x%p: rx timeout, send abort\n", + __func__, session); + + j1939_session_list_lock(session->priv); + if (session->state >= J1939_SESSION_ACTIVE && + session->state < J1939_SESSION_ACTIVE_MAX) { + j1939_session_get(session); + hrtimer_start(&session->rxtimer, + ms_to_ktime(J1939_XTP_ABORT_TIMEOUT_MS), + HRTIMER_MODE_REL_SOFT); + j1939_session_cancel(session, J1939_XTP_ABORT_TIMEOUT); + } + j1939_session_list_unlock(session->priv); + } + + j1939_session_put(session); + + return HRTIMER_NORESTART; +} + +static bool j1939_xtp_rx_cmd_bad_pgn(struct j1939_session *session, + const struct sk_buff *skb) +{ + const struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + pgn_t pgn = j1939_xtp_ctl_to_pgn(skb->data); + struct j1939_priv *priv = session->priv; + enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; + u8 cmd = skb->data[0]; + + if (session->skcb.addr.pgn == pgn) + return false; + + switch (cmd) { + case J1939_TP_CMD_BAM: + abort = J1939_XTP_NO_ABORT; + break; + + case J1939_ETP_CMD_RTS: + case J1939_TP_CMD_RTS: /* fall through */ + abort = J1939_XTP_ABORT_BUSY; + break; + + case J1939_ETP_CMD_CTS: + case J1939_TP_CMD_CTS: /* fall through */ + abort = J1939_XTP_ABORT_ECTS_UNXPECTED_PGN; + break; + + case J1939_ETP_CMD_DPO: + abort = J1939_XTP_ABORT_BAD_EDPO_PGN; + break; + + case J1939_ETP_CMD_EOMA: + case J1939_TP_CMD_EOMA: /* fall through */ + abort = J1939_XTP_ABORT_OTHER; + break; + + case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + abort = J1939_XTP_NO_ABORT; + break; + + default: + WARN_ON_ONCE(1); + break; + } + + netdev_warn(priv->ndev, "%s: 0x%p: CMD 0x%02x with PGN 0x%05x for running session with different PGN 0x%05x.\n", + __func__, session, cmd, pgn, session->skcb.addr.pgn); + if (abort != J1939_XTP_NO_ABORT) + j1939_xtp_tx_abort(priv, skcb, true, abort, pgn); + + return true; +} + +static void j1939_xtp_rx_abort_one(struct j1939_priv *priv, struct sk_buff *skb, + bool reverse, bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + u8 abort = skb->data[1]; + + session = j1939_session_get_by_addr(priv, &skcb->addr, reverse, + transmitter); + if (!session) + return; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + goto abort_put; + + netdev_info(priv->ndev, "%s: 0x%p: 0x%05x: (%u) %s\n", __func__, + session, j1939_xtp_ctl_to_pgn(skb->data), abort, + j1939_xtp_abort_to_str(abort)); + + j1939_session_timers_cancel(session); + session->err = j1939_xtp_abort_to_errno(priv, abort); + if (session->sk) + j1939_sk_send_loop_abort(session->sk, session->err); + j1939_session_deactivate_activate_next(session); + +abort_put: + j1939_session_put(session); +} + +/* abort packets may come in 2 directions */ +static void +j1939_xtp_rx_abort(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + j1939_xtp_rx_abort_one(priv, skb, false, transmitter); + j1939_xtp_rx_abort_one(priv, skb, true, transmitter); +} + +static void +j1939_xtp_rx_eoma_one(struct j1939_session *session, struct sk_buff *skb) +{ + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + session->pkt.tx_acked = session->pkt.total; + j1939_session_timers_cancel(session); + /* transmitted without problems */ + j1939_session_completed(session); +} + +static void +j1939_xtp_rx_eoma(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, true, + transmitter); + if (!session) + return; + + j1939_xtp_rx_eoma_one(session, skb); + j1939_session_put(session); +} + +static void +j1939_xtp_rx_cts_one(struct j1939_session *session, struct sk_buff *skb) +{ + enum j1939_xtp_abort err = J1939_XTP_ABORT_FAULT; + unsigned int pkt; + const u8 *dat; + + dat = skb->data; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + if (session->last_cmd == dat[0]) { + err = J1939_XTP_ABORT_DUP_SEQ; + goto out_session_cancel; + } + + if (session->skcb.addr.type == J1939_ETP) + pkt = j1939_etp_ctl_to_packet(dat); + else + pkt = dat[2]; + + if (!pkt) + goto out_session_cancel; + else if (dat[1] > session->pkt.block /* 0xff for etp */) + goto out_session_cancel; + + /* set packet counters only when not CTS(0) */ + session->pkt.tx_acked = pkt - 1; + j1939_session_skb_drop_old(session); + session->pkt.last = session->pkt.tx_acked + dat[1]; + if (session->pkt.last > session->pkt.total) + /* safety measure */ + session->pkt.last = session->pkt.total; + /* TODO: do not set tx here, do it in txtimer */ + session->pkt.tx = session->pkt.tx_acked; + + session->last_cmd = dat[0]; + if (dat[1]) { + j1939_tp_set_rxtimeout(session, 1250); + if (session->transmission) { + if (session->pkt.tx_acked) + j1939_sk_errqueue(session, + J1939_ERRQUEUE_SCHED); + j1939_session_txtimer_cancel(session); + j1939_tp_schedule_txtimer(session, 0); + } + } else { + /* CTS(0) */ + j1939_tp_set_rxtimeout(session, 550); + } + return; + + out_session_cancel: + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, err); +} + +static void +j1939_xtp_rx_cts(struct j1939_priv *priv, struct sk_buff *skb, bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, true, + transmitter); + if (!session) + return; + j1939_xtp_rx_cts_one(session, skb); + j1939_session_put(session); +} + +static struct j1939_session *j1939_session_new(struct j1939_priv *priv, + struct sk_buff *skb, size_t size) +{ + struct j1939_session *session; + struct j1939_sk_buff_cb *skcb; + + session = kzalloc(sizeof(*session), gfp_any()); + if (!session) + return NULL; + + INIT_LIST_HEAD(&session->active_session_list_entry); + INIT_LIST_HEAD(&session->sk_session_queue_entry); + kref_init(&session->kref); + + j1939_priv_get(priv); + session->priv = priv; + session->total_message_size = size; + session->state = J1939_SESSION_NEW; + + skb_queue_head_init(&session->skb_queue); + skb_queue_tail(&session->skb_queue, skb); + + skcb = j1939_skb_to_cb(skb); + memcpy(&session->skcb, skcb, sizeof(session->skcb)); + + hrtimer_init(&session->txtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); + session->txtimer.function = j1939_tp_txtimer; + hrtimer_init(&session->rxtimer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_SOFT); + session->rxtimer.function = j1939_tp_rxtimer; + + netdev_dbg(priv->ndev, "%s: 0x%p: sa: %02x, da: %02x\n", + __func__, session, skcb->addr.sa, skcb->addr.da); + + return session; +} + +static struct +j1939_session *j1939_session_fresh_new(struct j1939_priv *priv, + int size, + const struct j1939_sk_buff_cb *rel_skcb) +{ + struct sk_buff *skb; + struct j1939_sk_buff_cb *skcb; + struct j1939_session *session; + + skb = alloc_skb(size + sizeof(struct can_skb_priv), GFP_ATOMIC); + if (unlikely(!skb)) + return NULL; + + skb->dev = priv->ndev; + can_skb_reserve(skb); + can_skb_prv(skb)->ifindex = priv->ndev->ifindex; + skcb = j1939_skb_to_cb(skb); + memcpy(skcb, rel_skcb, sizeof(*skcb)); + + session = j1939_session_new(priv, skb, skb->len); + if (!session) { + kfree_skb(skb); + return NULL; + } + + /* alloc data area */ + skb_put(skb, size); + /* skb is recounted in j1939_session_new() */ + return session; +} + +int j1939_session_activate(struct j1939_session *session) +{ + struct j1939_priv *priv = session->priv; + struct j1939_session *active = NULL; + int ret = 0; + + j1939_session_list_lock(priv); + if (session->skcb.addr.type != J1939_SIMPLE) + active = j1939_session_get_by_addr_locked(priv, + &priv->active_session_list, + &session->skcb.addr, false, + session->transmission); + if (active) { + j1939_session_put(active); + ret = -EAGAIN; + } else { + WARN_ON_ONCE(session->state != J1939_SESSION_NEW); + list_add_tail(&session->active_session_list_entry, + &priv->active_session_list); + j1939_session_get(session); + session->state = J1939_SESSION_ACTIVE; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", + __func__, session); + } + j1939_session_list_unlock(priv); + + return ret; +} + +static struct +j1939_session *j1939_xtp_rx_rts_session_new(struct j1939_priv *priv, + struct sk_buff *skb) +{ + enum j1939_xtp_abort abort = J1939_XTP_NO_ABORT; + struct j1939_sk_buff_cb skcb = *j1939_skb_to_cb(skb); + struct j1939_session *session; + const u8 *dat; + pgn_t pgn; + int len; + + netdev_dbg(priv->ndev, "%s\n", __func__); + + dat = skb->data; + pgn = j1939_xtp_ctl_to_pgn(dat); + skcb.addr.pgn = pgn; + + if (!j1939_sk_recv_match(priv, &skcb)) + return NULL; + + if (skcb.addr.type == J1939_ETP) { + len = j1939_etp_ctl_to_size(dat); + if (len > J1939_MAX_ETP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + else if (len > priv->tp_max_packet_size) + abort = J1939_XTP_ABORT_RESOURCE; + else if (len <= J1939_MAX_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + } else { + len = j1939_tp_ctl_to_size(dat); + if (len > J1939_MAX_TP_PACKET_SIZE) + abort = J1939_XTP_ABORT_FAULT; + else if (len > priv->tp_max_packet_size) + abort = J1939_XTP_ABORT_RESOURCE; + } + + if (abort != J1939_XTP_NO_ABORT) { + j1939_xtp_tx_abort(priv, &skcb, true, abort, pgn); + return NULL; + } + + session = j1939_session_fresh_new(priv, len, &skcb); + if (!session) { + j1939_xtp_tx_abort(priv, &skcb, true, + J1939_XTP_ABORT_RESOURCE, pgn); + return NULL; + } + + /* initialize the control buffer: plain copy */ + session->pkt.total = (len + 6) / 7; + session->pkt.block = 0xff; + if (skcb.addr.type != J1939_ETP) { + if (dat[3] != session->pkt.total) + netdev_alert(priv->ndev, "%s: 0x%p: strange total, %u != %u\n", + __func__, session, session->pkt.total, + dat[3]); + session->pkt.total = dat[3]; + session->pkt.block = min(dat[3], dat[4]); + } + + session->pkt.rx = 0; + session->pkt.tx = 0; + + WARN_ON_ONCE(j1939_session_activate(session)); + + return session; +} + +static int j1939_xtp_rx_rts_session_active(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_priv *priv = session->priv; + + if (!session->transmission) { + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return -EBUSY; + + /* RTS on active session */ + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); + } + + if (session->last_cmd != 0) { + /* we received a second rts on the same connection */ + netdev_alert(priv->ndev, "%s: 0x%p: connection exists (%02x %02x). last cmd: %x\n", + __func__, session, skcb->addr.sa, skcb->addr.da, + session->last_cmd); + + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_BUSY); + + return -EBUSY; + } + + if (session->skcb.addr.sa != skcb->addr.sa || + session->skcb.addr.da != skcb->addr.da) + netdev_warn(priv->ndev, "%s: 0x%p: session->skcb.addr.sa=0x%02x skcb->addr.sa=0x%02x session->skcb.addr.da=0x%02x skcb->addr.da=0x%02x\n", + __func__, session, + session->skcb.addr.sa, skcb->addr.sa, + session->skcb.addr.da, skcb->addr.da); + /* make sure 'sa' & 'da' are correct ! + * They may be 'not filled in yet' for sending + * skb's, since they did not pass the Address Claim ever. + */ + session->skcb.addr.sa = skcb->addr.sa; + session->skcb.addr.da = skcb->addr.da; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + return 0; +} + +static void j1939_xtp_rx_rts(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + u8 cmd = skb->data[0]; + + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + transmitter); + + if (!session) { + if (transmitter) { + /* If we're the transmitter and this function is called, + * we received our own RTS. A session has already been + * created. + * + * For some reasons however it might have been destroyed + * already. So don't create a new one here (using + * "j1939_xtp_rx_rts_session_new()") as this will be a + * receiver session. + * + * The reasons the session is already destroyed might + * be: + * - user space closed socket was and the session was + * aborted + * - session was aborted due to external abort message + */ + return; + } + session = j1939_xtp_rx_rts_session_new(priv, skb); + if (!session) + return; + } else { + if (j1939_xtp_rx_rts_session_active(session, skb)) { + j1939_session_put(session); + return; + } + } + session->last_cmd = cmd; + + j1939_tp_set_rxtimeout(session, 1250); + + if (cmd != J1939_TP_CMD_BAM && !session->transmission) { + j1939_session_txtimer_cancel(session); + j1939_tp_schedule_txtimer(session, 0); + } + + j1939_session_put(session); +} + +static void j1939_xtp_rx_dpo_one(struct j1939_session *session, + struct sk_buff *skb) +{ + const u8 *dat = skb->data; + + if (j1939_xtp_rx_cmd_bad_pgn(session, skb)) + return; + + netdev_dbg(session->priv->ndev, "%s: 0x%p\n", __func__, session); + + /* transmitted without problems */ + session->pkt.dpo = j1939_etp_ctl_to_packet(skb->data); + session->last_cmd = dat[0]; + j1939_tp_set_rxtimeout(session, 750); +} + +static void j1939_xtp_rx_dpo(struct j1939_priv *priv, struct sk_buff *skb, + bool transmitter) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + transmitter); + if (!session) { + netdev_info(priv->ndev, + "%s: no connection found\n", __func__); + return; + } + + j1939_xtp_rx_dpo_one(session, skb); + j1939_session_put(session); +} + +static void j1939_xtp_rx_dat_one(struct j1939_session *session, + struct sk_buff *skb) +{ + struct j1939_priv *priv = session->priv; + struct j1939_sk_buff_cb *skcb; + struct sk_buff *se_skb; + const u8 *dat; + u8 *tpdat; + int offset; + int nbytes; + bool final = false; + bool do_cts_eoma = false; + int packet; + + skcb = j1939_skb_to_cb(skb); + dat = skb->data; + if (skb->len <= 1) + /* makes no sense */ + goto out_session_cancel; + + switch (session->last_cmd) { + case 0xff: + break; + case J1939_ETP_CMD_DPO: + if (skcb->addr.type == J1939_ETP) + break; + /* fall through */ + case J1939_TP_CMD_BAM: /* fall through */ + case J1939_TP_CMD_CTS: /* fall through */ + if (skcb->addr.type != J1939_ETP) + break; + /* fall through */ + default: + netdev_info(priv->ndev, "%s: 0x%p: last %02x\n", __func__, + session, session->last_cmd); + goto out_session_cancel; + } + + packet = (dat[0] - 1 + session->pkt.dpo); + if (packet > session->pkt.total || + (session->pkt.rx + 1) > session->pkt.total) { + netdev_info(priv->ndev, "%s: 0x%p: should have been completed\n", + __func__, session); + goto out_session_cancel; + } + se_skb = j1939_session_skb_find(session); + if (!se_skb) { + netdev_warn(priv->ndev, "%s: 0x%p: no skb found\n", __func__, + session); + goto out_session_cancel; + } + + skcb = j1939_skb_to_cb(se_skb); + offset = packet * 7 - skcb->offset; + nbytes = se_skb->len - offset; + if (nbytes > 7) + nbytes = 7; + if (nbytes <= 0 || (nbytes + 1) > skb->len) { + netdev_info(priv->ndev, "%s: 0x%p: nbytes %i, len %i\n", + __func__, session, nbytes, skb->len); + goto out_session_cancel; + } + + tpdat = se_skb->data; + memcpy(&tpdat[offset], &dat[1], nbytes); + if (packet == session->pkt.rx) + session->pkt.rx++; + + if (skcb->addr.type != J1939_ETP && + j1939_cb_is_broadcast(&session->skcb)) { + if (session->pkt.rx >= session->pkt.total) + final = true; + } else { + /* never final, an EOMA must follow */ + if (session->pkt.rx >= session->pkt.last) + do_cts_eoma = true; + } + + if (final) { + j1939_session_completed(session); + } else if (do_cts_eoma) { + j1939_tp_set_rxtimeout(session, 1250); + if (!session->transmission) + j1939_tp_schedule_txtimer(session, 0); + } else { + j1939_tp_set_rxtimeout(session, 250); + } + session->last_cmd = 0xff; + j1939_session_put(session); + + return; + + out_session_cancel: + j1939_session_timers_cancel(session); + j1939_tp_set_rxtimeout(session, J1939_XTP_ABORT_TIMEOUT_MS); + j1939_session_cancel(session, J1939_XTP_ABORT_FAULT); + j1939_session_put(session); +} + +static void j1939_xtp_rx_dat(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb; + struct j1939_session *session; + + skcb = j1939_skb_to_cb(skb); + + if (j1939_tp_im_transmitter(skcb)) { + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + true); + if (!session) + netdev_info(priv->ndev, "%s: no tx connection found\n", + __func__); + else + j1939_xtp_rx_dat_one(session, skb); + } + + if (j1939_tp_im_receiver(skcb)) { + session = j1939_session_get_by_addr(priv, &skcb->addr, false, + false); + if (!session) + netdev_info(priv->ndev, "%s: no rx connection found\n", + __func__); + else + j1939_xtp_rx_dat_one(session, skb); + } +} + +/* j1939 main intf */ +struct j1939_session *j1939_tp_send(struct j1939_priv *priv, + struct sk_buff *skb, size_t size) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + struct j1939_session *session; + int ret; + + if (skcb->addr.pgn == J1939_TP_PGN_DAT || + skcb->addr.pgn == J1939_TP_PGN_CTL || + skcb->addr.pgn == J1939_ETP_PGN_DAT || + skcb->addr.pgn == J1939_ETP_PGN_CTL) + /* avoid conflict */ + return ERR_PTR(-EDOM); + + if (size > priv->tp_max_packet_size) + return ERR_PTR(-EMSGSIZE); + + if (size <= 8) + skcb->addr.type = J1939_SIMPLE; + else if (size > J1939_MAX_TP_PACKET_SIZE) + skcb->addr.type = J1939_ETP; + else + skcb->addr.type = J1939_TP; + + if (skcb->addr.type == J1939_ETP && + j1939_cb_is_broadcast(skcb)) + return ERR_PTR(-EDESTADDRREQ); + + /* fill in addresses from names */ + ret = j1939_ac_fixup(priv, skb); + if (unlikely(ret)) + return ERR_PTR(ret); + + /* fix DST flags, it may be used there soon */ + if (j1939_address_is_unicast(skcb->addr.da) && + priv->ents[skcb->addr.da].nusers) + skcb->flags |= J1939_ECU_LOCAL_DST; + + /* src is always local, I'm sending ... */ + skcb->flags |= J1939_ECU_LOCAL_SRC; + + /* prepare new session */ + session = j1939_session_new(priv, skb, size); + if (!session) + return ERR_PTR(-ENOMEM); + + /* skb is recounted in j1939_session_new() */ + session->sk = skb->sk; + session->transmission = true; + session->pkt.total = (size + 6) / 7; + session->pkt.block = skcb->addr.type == J1939_ETP ? 255 : + min(j1939_tp_block ?: 255, session->pkt.total); + + if (j1939_cb_is_broadcast(&session->skcb)) + /* set the end-packet for broadcast */ + session->pkt.last = session->pkt.total; + + skcb->tskey = session->sk->sk_tskey++; + session->tskey = skcb->tskey; + + return session; +} + +static void j1939_tp_cmd_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + int extd = J1939_TP; + u8 cmd = skb->data[0]; + + switch (cmd) { + case J1939_ETP_CMD_RTS: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_BAM: /* fall through */ + case J1939_TP_CMD_RTS: /* fall through */ + if (skcb->addr.type != extd) + return; + + if (cmd == J1939_TP_CMD_RTS && j1939_cb_is_broadcast(skcb)) { + netdev_alert(priv->ndev, "%s: rts without destination (%02x)\n", + __func__, skcb->addr.sa); + return; + } + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_rts(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_rts(priv, skb, false); + + break; + + case J1939_ETP_CMD_CTS: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_CTS: + if (skcb->addr.type != extd) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_cts(priv, skb, false); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_cts(priv, skb, true); + + break; + + case J1939_ETP_CMD_DPO: + if (skcb->addr.type != J1939_ETP) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_dpo(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_dpo(priv, skb, false); + + break; + + case J1939_ETP_CMD_EOMA: + extd = J1939_ETP; + /* fall through */ + case J1939_TP_CMD_EOMA: + if (skcb->addr.type != extd) + return; + + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_eoma(priv, skb, false); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_eoma(priv, skb, true); + + break; + + case J1939_ETP_CMD_ABORT: /* && J1939_TP_CMD_ABORT */ + if (j1939_tp_im_transmitter(skcb)) + j1939_xtp_rx_abort(priv, skb, true); + + if (j1939_tp_im_receiver(skcb)) + j1939_xtp_rx_abort(priv, skb, false); + + break; + default: + return; + } +} + +int j1939_tp_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_sk_buff_cb *skcb = j1939_skb_to_cb(skb); + + if (!j1939_tp_im_involved_anydir(skcb)) + return 0; + + switch (skcb->addr.pgn) { + case J1939_ETP_PGN_DAT: + skcb->addr.type = J1939_ETP; + /* fall through */ + case J1939_TP_PGN_DAT: + j1939_xtp_rx_dat(priv, skb); + break; + + case J1939_ETP_PGN_CTL: + skcb->addr.type = J1939_ETP; + /* fall through */ + case J1939_TP_PGN_CTL: + if (skb->len < 8) + return 0; /* Don't care. Nothing to extract here */ + + j1939_tp_cmd_recv(priv, skb); + break; + default: + return 0; /* no problem */ + } + return 1; /* "I processed the message" */ +} + +void j1939_simple_recv(struct j1939_priv *priv, struct sk_buff *skb) +{ + struct j1939_session *session; + + if (!skb->sk) + return; + + j1939_session_list_lock(priv); + session = j1939_session_get_simple(priv, skb); + j1939_session_list_unlock(priv); + if (!session) { + netdev_warn(priv->ndev, + "%s: Received already invalidated message\n", + __func__); + return; + } + + j1939_session_timers_cancel(session); + j1939_session_deactivate(session); + j1939_session_put(session); +} + +int j1939_cancel_active_session(struct j1939_priv *priv, struct sock *sk) +{ + struct j1939_session *session, *saved; + + netdev_dbg(priv->ndev, "%s, sk: %p\n", __func__, sk); + j1939_session_list_lock(priv); + list_for_each_entry_safe(session, saved, + &priv->active_session_list, + active_session_list_entry) { + if (!sk || sk == session->sk) { + j1939_session_timers_cancel(session); + session->err = ESHUTDOWN; + j1939_session_deactivate_locked(session); + } + } + j1939_session_list_unlock(priv); + return NOTIFY_DONE; +} + +void j1939_tp_init(struct j1939_priv *priv) +{ + spin_lock_init(&priv->active_session_list_lock); + INIT_LIST_HEAD(&priv->active_session_list); + priv->tp_max_packet_size = J1939_MAX_ETP_PACKET_SIZE; +} diff --git a/net/can/proc.c b/net/can/proc.c index 70fea17bb04c..e6881bfc3ed1 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* * proc.c - procfs support for Protocol family CAN core module * @@ -44,6 +45,7 @@ #include <linux/list.h> #include <linux/rcupdate.h> #include <linux/if_arp.h> +#include <linux/can/can-ml.h> #include <linux/can/core.h> #include "af_can.h" @@ -77,21 +79,21 @@ static const char rx_list_name[][8] = { static void can_init_stats(struct net *net) { - struct s_stats *can_stats = net->can.can_stats; - struct s_pstats *can_pstats = net->can.can_pstats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; /* * This memset function is called from a timer context (when * can_stattimer is active which is the default) OR in a process * context (reading the proc_fs when can_stattimer is disabled). */ - memset(can_stats, 0, sizeof(struct s_stats)); - can_stats->jiffies_init = jiffies; + memset(pkg_stats, 0, sizeof(struct can_pkg_stats)); + pkg_stats->jiffies_init = jiffies; - can_pstats->stats_reset++; + rcv_lists_stats->stats_reset++; if (user_reset) { user_reset = 0; - can_pstats->user_reset++; + rcv_lists_stats->user_reset++; } } @@ -117,8 +119,8 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif, void can_stat_update(struct timer_list *t) { - struct net *net = from_timer(net, t, can.can_stattimer); - struct s_stats *can_stats = net->can.can_stats; + struct net *net = from_timer(net, t, can.stattimer); + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; unsigned long j = jiffies; /* snapshot */ /* restart counting in timer context on user request */ @@ -126,57 +128,57 @@ void can_stat_update(struct timer_list *t) can_init_stats(net); /* restart counting on jiffies overflow */ - if (j < can_stats->jiffies_init) + if (j < pkg_stats->jiffies_init) can_init_stats(net); /* prevent overflow in calc_rate() */ - if (can_stats->rx_frames > (ULONG_MAX / HZ)) + if (pkg_stats->rx_frames > (ULONG_MAX / HZ)) can_init_stats(net); /* prevent overflow in calc_rate() */ - if (can_stats->tx_frames > (ULONG_MAX / HZ)) + if (pkg_stats->tx_frames > (ULONG_MAX / HZ)) can_init_stats(net); /* matches overflow - very improbable */ - if (can_stats->matches > (ULONG_MAX / 100)) + if (pkg_stats->matches > (ULONG_MAX / 100)) can_init_stats(net); /* calc total values */ - if (can_stats->rx_frames) - can_stats->total_rx_match_ratio = (can_stats->matches * 100) / - can_stats->rx_frames; + if (pkg_stats->rx_frames) + pkg_stats->total_rx_match_ratio = (pkg_stats->matches * 100) / + pkg_stats->rx_frames; - can_stats->total_tx_rate = calc_rate(can_stats->jiffies_init, j, - can_stats->tx_frames); - can_stats->total_rx_rate = calc_rate(can_stats->jiffies_init, j, - can_stats->rx_frames); + pkg_stats->total_tx_rate = calc_rate(pkg_stats->jiffies_init, j, + pkg_stats->tx_frames); + pkg_stats->total_rx_rate = calc_rate(pkg_stats->jiffies_init, j, + pkg_stats->rx_frames); /* calc current values */ - if (can_stats->rx_frames_delta) - can_stats->current_rx_match_ratio = - (can_stats->matches_delta * 100) / - can_stats->rx_frames_delta; + if (pkg_stats->rx_frames_delta) + pkg_stats->current_rx_match_ratio = + (pkg_stats->matches_delta * 100) / + pkg_stats->rx_frames_delta; - can_stats->current_tx_rate = calc_rate(0, HZ, can_stats->tx_frames_delta); - can_stats->current_rx_rate = calc_rate(0, HZ, can_stats->rx_frames_delta); + pkg_stats->current_tx_rate = calc_rate(0, HZ, pkg_stats->tx_frames_delta); + pkg_stats->current_rx_rate = calc_rate(0, HZ, pkg_stats->rx_frames_delta); /* check / update maximum values */ - if (can_stats->max_tx_rate < can_stats->current_tx_rate) - can_stats->max_tx_rate = can_stats->current_tx_rate; + if (pkg_stats->max_tx_rate < pkg_stats->current_tx_rate) + pkg_stats->max_tx_rate = pkg_stats->current_tx_rate; - if (can_stats->max_rx_rate < can_stats->current_rx_rate) - can_stats->max_rx_rate = can_stats->current_rx_rate; + if (pkg_stats->max_rx_rate < pkg_stats->current_rx_rate) + pkg_stats->max_rx_rate = pkg_stats->current_rx_rate; - if (can_stats->max_rx_match_ratio < can_stats->current_rx_match_ratio) - can_stats->max_rx_match_ratio = can_stats->current_rx_match_ratio; + if (pkg_stats->max_rx_match_ratio < pkg_stats->current_rx_match_ratio) + pkg_stats->max_rx_match_ratio = pkg_stats->current_rx_match_ratio; /* clear values for 'current rate' calculation */ - can_stats->tx_frames_delta = 0; - can_stats->rx_frames_delta = 0; - can_stats->matches_delta = 0; + pkg_stats->tx_frames_delta = 0; + pkg_stats->rx_frames_delta = 0; + pkg_stats->matches_delta = 0; /* restart timer (one second) */ - mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ)); + mod_timer(&net->can.stattimer, round_jiffies(jiffies + HZ)); } /* @@ -211,60 +213,60 @@ static void can_print_recv_banner(struct seq_file *m) static int can_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; - struct s_stats *can_stats = net->can.can_stats; - struct s_pstats *can_pstats = net->can.can_pstats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; seq_putc(m, '\n'); - seq_printf(m, " %8ld transmitted frames (TXF)\n", can_stats->tx_frames); - seq_printf(m, " %8ld received frames (RXF)\n", can_stats->rx_frames); - seq_printf(m, " %8ld matched frames (RXMF)\n", can_stats->matches); + seq_printf(m, " %8ld transmitted frames (TXF)\n", pkg_stats->tx_frames); + seq_printf(m, " %8ld received frames (RXF)\n", pkg_stats->rx_frames); + seq_printf(m, " %8ld matched frames (RXMF)\n", pkg_stats->matches); seq_putc(m, '\n'); - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.stattimer.function == can_stat_update) { seq_printf(m, " %8ld %% total match ratio (RXMR)\n", - can_stats->total_rx_match_ratio); + pkg_stats->total_rx_match_ratio); seq_printf(m, " %8ld frames/s total tx rate (TXR)\n", - can_stats->total_tx_rate); + pkg_stats->total_tx_rate); seq_printf(m, " %8ld frames/s total rx rate (RXR)\n", - can_stats->total_rx_rate); + pkg_stats->total_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% current match ratio (CRXMR)\n", - can_stats->current_rx_match_ratio); + pkg_stats->current_rx_match_ratio); seq_printf(m, " %8ld frames/s current tx rate (CTXR)\n", - can_stats->current_tx_rate); + pkg_stats->current_tx_rate); seq_printf(m, " %8ld frames/s current rx rate (CRXR)\n", - can_stats->current_rx_rate); + pkg_stats->current_rx_rate); seq_putc(m, '\n'); seq_printf(m, " %8ld %% max match ratio (MRXMR)\n", - can_stats->max_rx_match_ratio); + pkg_stats->max_rx_match_ratio); seq_printf(m, " %8ld frames/s max tx rate (MTXR)\n", - can_stats->max_tx_rate); + pkg_stats->max_tx_rate); seq_printf(m, " %8ld frames/s max rx rate (MRXR)\n", - can_stats->max_rx_rate); + pkg_stats->max_rx_rate); seq_putc(m, '\n'); } seq_printf(m, " %8ld current receive list entries (CRCV)\n", - can_pstats->rcv_entries); + rcv_lists_stats->rcv_entries); seq_printf(m, " %8ld maximum receive list entries (MRCV)\n", - can_pstats->rcv_entries_max); + rcv_lists_stats->rcv_entries_max); - if (can_pstats->stats_reset) + if (rcv_lists_stats->stats_reset) seq_printf(m, "\n %8ld statistic resets (STR)\n", - can_pstats->stats_reset); + rcv_lists_stats->stats_reset); - if (can_pstats->user_reset) + if (rcv_lists_stats->user_reset) seq_printf(m, " %8ld user statistic resets (USTR)\n", - can_pstats->user_reset); + rcv_lists_stats->user_reset); seq_putc(m, '\n'); return 0; @@ -273,20 +275,20 @@ static int can_stats_proc_show(struct seq_file *m, void *v) static int can_reset_stats_proc_show(struct seq_file *m, void *v) { struct net *net = m->private; - struct s_pstats *can_pstats = net->can.can_pstats; - struct s_stats *can_stats = net->can.can_stats; + struct can_rcv_lists_stats *rcv_lists_stats = net->can.rcv_lists_stats; + struct can_pkg_stats *pkg_stats = net->can.pkg_stats; user_reset = 1; - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.stattimer.function == can_stat_update) { seq_printf(m, "Scheduled statistic reset #%ld.\n", - can_pstats->stats_reset + 1); + rcv_lists_stats->stats_reset + 1); } else { - if (can_stats->jiffies_init != jiffies) + if (pkg_stats->jiffies_init != jiffies) can_init_stats(net); seq_printf(m, "Performed statistic reset #%ld.\n", - can_pstats->stats_reset); + rcv_lists_stats->stats_reset); } return 0; } @@ -299,11 +301,11 @@ static int can_version_proc_show(struct seq_file *m, void *v) static inline void can_rcvlist_proc_show_one(struct seq_file *m, int idx, struct net_device *dev, - struct can_dev_rcv_lists *d) + struct can_dev_rcv_lists *dev_rcv_lists) { - if (!hlist_empty(&d->rx[idx])) { + if (!hlist_empty(&dev_rcv_lists->rx[idx])) { can_print_recv_banner(m); - can_print_rcvlist(m, &d->rx[idx], dev); + can_print_rcvlist(m, &dev_rcv_lists->rx[idx], dev); } else seq_printf(m, " (%s: no entry)\n", DNAME(dev)); @@ -314,7 +316,7 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) /* double cast to prevent GCC warning */ int idx = (int)(long)PDE_DATA(m->file->f_inode); struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; seq_printf(m, "\nreceive list '%s':\n", rx_list_name[idx]); @@ -322,8 +324,8 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_one(m, idx, NULL, d); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_one(m, idx, NULL, dev_rcv_lists); /* receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { @@ -365,7 +367,7 @@ static inline void can_rcvlist_proc_show_array(struct seq_file *m, static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; /* RX_SFF */ @@ -374,15 +376,16 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* sff receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_array(m, NULL, d->rx_sff, ARRAY_SIZE(d->rx_sff)); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_sff, + ARRAY_SIZE(dev_rcv_lists->rx_sff)); /* sff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { - d = dev->ml_priv; - can_rcvlist_proc_show_array(m, dev, d->rx_sff, - ARRAY_SIZE(d->rx_sff)); + dev_rcv_lists = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff, + ARRAY_SIZE(dev_rcv_lists->rx_sff)); } } @@ -395,7 +398,7 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v) static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) { struct net_device *dev; - struct can_dev_rcv_lists *d; + struct can_dev_rcv_lists *dev_rcv_lists; struct net *net = m->private; /* RX_EFF */ @@ -404,15 +407,16 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v) rcu_read_lock(); /* eff receive list for 'all' CAN devices (dev == NULL) */ - d = net->can.can_rx_alldev_list; - can_rcvlist_proc_show_array(m, NULL, d->rx_eff, ARRAY_SIZE(d->rx_eff)); + dev_rcv_lists = net->can.rx_alldev_list; + can_rcvlist_proc_show_array(m, NULL, dev_rcv_lists->rx_eff, + ARRAY_SIZE(dev_rcv_lists->rx_eff)); /* eff receive list for registered CAN devices */ for_each_netdev_rcu(net, dev) { if (dev->type == ARPHRD_CAN && dev->ml_priv) { - d = dev->ml_priv; - can_rcvlist_proc_show_array(m, dev, d->rx_eff, - ARRAY_SIZE(d->rx_eff)); + dev_rcv_lists = dev->ml_priv; + can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff, + ARRAY_SIZE(dev_rcv_lists->rx_eff)); } } diff --git a/net/can/raw.c b/net/can/raw.c index afcbff063a67..59c039d73c6d 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -1,5 +1,5 @@ -/* - * raw.c - Raw sockets for protocol family CAN +// SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) +/* raw.c - Raw sockets for protocol family CAN * * Copyright (c) 2002-2007 Volkswagen Group Electronic Research * All rights reserved. @@ -64,8 +64,7 @@ MODULE_ALIAS("can-proto-1"); #define MASK_ALL 0 -/* - * A raw socket has a list of can_filters attached to it, each receiving +/* A raw socket has a list of can_filters attached to it, each receiving * the CAN frames matching that filter. If the filter list is empty, * no CAN frames will be received by the socket. The default after * opening the socket, is to have one filter which receives all frames. @@ -96,8 +95,7 @@ struct raw_sock { struct uniqframe __percpu *uniq; }; -/* - * Return pointer to store the extra msg flags for raw_recvmsg(). +/* Return pointer to store the extra msg flags for raw_recvmsg(). * We use the space of one unsigned int beyond the 'struct sockaddr_can' * in skb->cb. */ @@ -156,8 +154,7 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (!skb) return; - /* - * Put the datagram to the queue so that raw_recvmsg() can + /* Put the datagram to the queue so that raw_recvmsg() can * get it from there. We need to pass the interface index to * raw_recvmsg(). We pass a whole struct sockaddr_can in skb->cb * containing the interface index. @@ -283,7 +280,6 @@ static int raw_notifier(struct notifier_block *nb, return NOTIFY_DONE; switch (msg) { - case NETDEV_UNREGISTER: lock_sock(sk); /* remove current filters & unregister */ @@ -369,8 +365,9 @@ static int raw_release(struct socket *sock) raw_disable_allfilters(dev_net(dev), dev, sk); dev_put(dev); } - } else + } else { raw_disable_allfilters(sock_net(sk), NULL, sk); + } } if (ro->count > 1) @@ -399,7 +396,7 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) int err = 0; int notify_enetdown = 0; - if (len < sizeof(*addr)) + if (len < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; @@ -450,8 +447,9 @@ static int raw_bind(struct socket *sock, struct sockaddr *uaddr, int len) dev, sk); dev_put(dev); } - } else + } else { raw_disable_allfilters(sock_net(sk), NULL, sk); + } } ro->ifindex = ifindex; ro->bound = 1; @@ -502,7 +500,6 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, return -EINVAL; switch (optname) { - case CAN_RAW_FILTER: if (optlen % sizeof(struct can_filter) != 0) return -EINVAL; @@ -665,17 +662,18 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return -EINVAL; switch (optname) { - case CAN_RAW_FILTER: lock_sock(sk); if (ro->count > 0) { int fsize = ro->count * sizeof(struct can_filter); + if (len > fsize) len = fsize; if (copy_to_user(optval, ro->filter, len)) err = -EFAULT; - } else + } else { len = 0; + } release_sock(sk); if (!err) @@ -735,15 +733,16 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); - if (msg->msg_namelen < sizeof(*addr)) + if (msg->msg_namelen < CAN_REQUIRED_SIZE(*addr, can_ifindex)) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; ifindex = addr->can_ifindex; - } else + } else { ifindex = ro->ifindex; + } dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) @@ -836,6 +835,13 @@ static int raw_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, return size; } +static int raw_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* no ioctls for socket layer -> hand it down to NIC layer */ + return -ENOIOCTLCMD; +} + static const struct proto_ops raw_ops = { .family = PF_CAN, .release = raw_release, @@ -845,7 +851,7 @@ static const struct proto_ops raw_ops = { .accept = sock_no_accept, .getname = raw_getname, .poll = datagram_poll, - .ioctl = can_ioctl, /* use can_ioctl() from af_can.c */ + .ioctl = raw_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, @@ -879,7 +885,7 @@ static __init int raw_module_init(void) err = can_proto_register(&raw_can_proto); if (err < 0) - printk(KERN_ERR "can: registration of raw protocol failed\n"); + pr_err("can: registration of raw protocol failed\n"); return err; } diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 94c7f77ecb6b..da5639a5bd3b 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -12,6 +12,9 @@ static atomic_t cache_idx; +#define SK_STORAGE_CREATE_FLAG_MASK \ + (BPF_F_NO_PREALLOC | BPF_F_CLONE) + struct bucket { struct hlist_head list; raw_spinlock_t lock; @@ -209,7 +212,6 @@ static void selem_unlink_sk(struct bpf_sk_storage_elem *selem) kfree_rcu(sk_storage, rcu); } -/* sk_storage->lock must be held and sk_storage->list cannot be empty */ static void __selem_link_sk(struct bpf_sk_storage *sk_storage, struct bpf_sk_storage_elem *selem) { @@ -509,7 +511,7 @@ static int sk_storage_delete(struct sock *sk, struct bpf_map *map) return 0; } -/* Called by __sk_destruct() */ +/* Called by __sk_destruct() & bpf_sk_storage_clone() */ void bpf_sk_storage_free(struct sock *sk) { struct bpf_sk_storage_elem *selem; @@ -557,6 +559,11 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) smap = (struct bpf_sk_storage_map *)map; + /* Note that this map might be concurrently cloned from + * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone + * RCU read section to finish before proceeding. New RCU + * read sections should be prevented via bpf_map_inc_not_zero. + */ synchronize_rcu(); /* bpf prog and the userspace can no longer access this map @@ -601,7 +608,9 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) { - if (attr->map_flags != BPF_F_NO_PREALLOC || attr->max_entries || + if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK || + !(attr->map_flags & BPF_F_NO_PREALLOC) || + attr->max_entries || attr->key_size != sizeof(int) || !attr->value_size || /* Enforce BTF for userspace sk dumping */ !attr->btf_key_type_id || !attr->btf_value_type_id) @@ -739,6 +748,95 @@ static int bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) return err; } +static struct bpf_sk_storage_elem * +bpf_sk_storage_clone_elem(struct sock *newsk, + struct bpf_sk_storage_map *smap, + struct bpf_sk_storage_elem *selem) +{ + struct bpf_sk_storage_elem *copy_selem; + + copy_selem = selem_alloc(smap, newsk, NULL, true); + if (!copy_selem) + return NULL; + + if (map_value_has_spin_lock(&smap->map)) + copy_map_value_locked(&smap->map, SDATA(copy_selem)->data, + SDATA(selem)->data, true); + else + copy_map_value(&smap->map, SDATA(copy_selem)->data, + SDATA(selem)->data); + + return copy_selem; +} + +int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) +{ + struct bpf_sk_storage *new_sk_storage = NULL; + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + int ret = 0; + + RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); + + rcu_read_lock(); + sk_storage = rcu_dereference(sk->sk_bpf_storage); + + if (!sk_storage || hlist_empty(&sk_storage->list)) + goto out; + + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { + struct bpf_sk_storage_elem *copy_selem; + struct bpf_sk_storage_map *smap; + struct bpf_map *map; + + smap = rcu_dereference(SDATA(selem)->smap); + if (!(smap->map.map_flags & BPF_F_CLONE)) + continue; + + /* Note that for lockless listeners adding new element + * here can race with cleanup in bpf_sk_storage_map_free. + * Try to grab map refcnt to make sure that it's still + * alive and prevent concurrent removal. + */ + map = bpf_map_inc_not_zero(&smap->map, false); + if (IS_ERR(map)) + continue; + + copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); + if (!copy_selem) { + ret = -ENOMEM; + bpf_map_put(map); + goto out; + } + + if (new_sk_storage) { + selem_link_map(smap, copy_selem); + __selem_link_sk(new_sk_storage, copy_selem); + } else { + ret = sk_storage_alloc(newsk, smap, copy_selem); + if (ret) { + kfree(copy_selem); + atomic_sub(smap->elem_size, + &newsk->sk_omem_alloc); + bpf_map_put(map); + goto out; + } + + new_sk_storage = rcu_dereference(copy_selem->sk_storage); + } + bpf_map_put(map); + } + +out: + rcu_read_unlock(); + + /* In case of an error, don't free anything explicitly here, the + * caller is responsible to call bpf_sk_storage_free. + */ + + return ret; +} + BPF_CALL_4(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, void *, value, u64, flags) { diff --git a/net/core/datagram.c b/net/core/datagram.c index 45a162ef5e02..4cc8dc5db2b7 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -442,8 +442,8 @@ static int __skb_datagram_iter(const struct sk_buff *skb, int offset, if (copy > len) copy = len; - n = cb(vaddr + frag->page_offset + - offset - start, copy, data, to); + n = cb(vaddr + skb_frag_off(frag) + offset - start, + copy, data, to); kunmap(page); offset += n; if (n != copy) @@ -573,7 +573,7 @@ int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, if (copy > len) copy = len; copied = copy_page_from_iter(skb_frag_page(frag), - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, from); if (copied != copy) goto fault; diff --git a/net/core/dev.c b/net/core/dev.c index 5156c0edebe8..71b18e80389f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3467,18 +3467,22 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); if (q->flags & TCQ_F_NOLOCK) { - if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) { - __qdisc_drop(skb, &to_free); - rc = NET_XMIT_DROP; - } else if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && - qdisc_run_begin(q)) { + if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && + qdisc_run_begin(q)) { + if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, + &q->state))) { + __qdisc_drop(skb, &to_free); + rc = NET_XMIT_DROP; + goto end_run; + } qdisc_bstats_cpu_update(q, skb); + rc = NET_XMIT_SUCCESS; if (sch_direct_xmit(skb, q, dev, txq, NULL, true)) __qdisc_run(q); +end_run: qdisc_run_end(q); - rc = NET_XMIT_SUCCESS; } else { rc = q->enqueue(skb, q, &to_free) & NET_XMIT_MASK; qdisc_run(q); @@ -3963,6 +3967,8 @@ int dev_weight_rx_bias __read_mostly = 1; /* bias for backlog weight */ int dev_weight_tx_bias __read_mostly = 1; /* bias for output_queue quota */ int dev_rx_weight __read_mostly = 64; int dev_tx_weight __read_mostly = 64; +/* Maximum number of GRO_NORMAL skbs to batch up for list-RX */ +int gro_normal_batch __read_mostly = 8; /* Called with irq disabled */ static inline void ____napi_schedule(struct softnet_data *sd, @@ -5486,7 +5492,7 @@ static void gro_pull_from_frag0(struct sk_buff *skb, int grow) skb->data_len -= grow; skb->tail += grow; - pinfo->frags[0].page_offset += grow; + skb_frag_off_add(&pinfo->frags[0], grow); skb_frag_size_sub(&pinfo->frags[0], grow); if (unlikely(!skb_frag_size(&pinfo->frags[0]))) { @@ -5747,6 +5753,26 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); +/* Pass the currently batched GRO_NORMAL SKBs up to the stack. */ +static void gro_normal_list(struct napi_struct *napi) +{ + if (!napi->rx_count) + return; + netif_receive_skb_list_internal(&napi->rx_list); + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; +} + +/* Queue one GRO_NORMAL SKB up for list processing. If batch size exceeded, + * pass the whole batch up to the stack. + */ +static void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb) +{ + list_add_tail(&skb->list, &napi->rx_list); + if (++napi->rx_count >= gro_normal_batch) + gro_normal_list(napi); +} + static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret) @@ -5756,8 +5782,8 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi, case GRO_HELD: __skb_push(skb, ETH_HLEN); skb->protocol = eth_type_trans(skb, skb->dev); - if (ret == GRO_NORMAL && netif_receive_skb_internal(skb)) - ret = GRO_DROP; + if (ret == GRO_NORMAL) + gro_normal_one(napi, skb); break; case GRO_DROP: @@ -6034,6 +6060,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done) NAPIF_STATE_IN_BUSY_POLL))) return false; + gro_normal_list(n); + if (n->gro_bitmask) { unsigned long timeout = 0; @@ -6119,10 +6147,19 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) * Ideally, a new ndo_busy_poll_stop() could avoid another round. */ rc = napi->poll(napi, BUSY_POLL_BUDGET); + /* We can't gro_normal_list() here, because napi->poll() might have + * rearmed the napi (napi_complete_done()) in which case it could + * already be running on another CPU. + */ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); netpoll_poll_unlock(have_poll_lock); - if (rc == BUSY_POLL_BUDGET) + if (rc == BUSY_POLL_BUDGET) { + /* As the whole budget was spent, we still own the napi so can + * safely handle the rx_list. + */ + gro_normal_list(napi); __napi_schedule(napi); + } local_bh_enable(); } @@ -6167,6 +6204,7 @@ restart: } work = napi_poll(napi, BUSY_POLL_BUDGET); trace_napi_poll(napi, work, BUSY_POLL_BUDGET); + gro_normal_list(napi); count: if (work > 0) __NET_ADD_STATS(dev_net(napi->dev), @@ -6272,6 +6310,8 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi, napi->timer.function = napi_watchdog; init_gro_hash(napi); napi->skb = NULL; + INIT_LIST_HEAD(&napi->rx_list); + napi->rx_count = 0; napi->poll = poll; if (weight > NAPI_POLL_WEIGHT) netdev_err_once(dev, "%s() called with weight %d\n", __func__, @@ -6368,6 +6408,8 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) goto out_unlock; } + gro_normal_list(n); + if (n->gro_bitmask) { /* flush too old packets * If HZ < 1000, flush all packets. @@ -8088,12 +8130,15 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_chk = generic_xdp_install; if (fd >= 0) { + u32 prog_id; + if (!offload && __dev_xdp_query(dev, bpf_chk, XDP_QUERY_PROG)) { NL_SET_ERR_MSG(extack, "native and generic XDP can't be active at the same time"); return -EEXIST; } - if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_query(dev, bpf_op, query)) { + + prog_id = __dev_xdp_query(dev, bpf_op, query); + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && prog_id) { NL_SET_ERR_MSG(extack, "XDP program already attached"); return -EBUSY; } @@ -8108,6 +8153,14 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_prog_put(prog); return -EINVAL; } + + if (prog->aux->id == prog_id) { + bpf_prog_put(prog); + return 0; + } + } else { + if (!__dev_xdp_query(dev, bpf_op, query)) + return 0; } err = dev_xdp_install(dev, bpf_op, extack, flags, prog); diff --git a/net/core/devlink.c b/net/core/devlink.c index 4f40aeace902..e48680efe54a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -18,6 +18,8 @@ #include <linux/spinlock.h> #include <linux/refcount.h> #include <linux/workqueue.h> +#include <linux/u64_stats_sync.h> +#include <linux/timekeeping.h> #include <rdma/ib_verbs.h> #include <net/netlink.h> #include <net/genetlink.h> @@ -25,6 +27,7 @@ #include <net/net_namespace.h> #include <net/sock.h> #include <net/devlink.h> +#include <net/drop_monitor.h> #define CREATE_TRACE_POINTS #include <trace/events/devlink.h> @@ -133,7 +136,7 @@ static struct devlink *devlink_get_from_info(struct genl_info *info) } static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, - int port_index) + unsigned int port_index) { struct devlink_port *devlink_port; @@ -144,7 +147,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, return NULL; } -static bool devlink_port_index_exists(struct devlink *devlink, int port_index) +static bool devlink_port_index_exists(struct devlink *devlink, + unsigned int port_index) { return devlink_port_get_by_index(devlink, port_index); } @@ -342,7 +346,6 @@ struct devlink_snapshot { struct list_head list; struct devlink_region *region; devlink_snapshot_data_dest_t *data_destructor; - u64 data_len; u8 *data; u32 id; }; @@ -371,14 +374,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id) return NULL; } -static void devlink_region_snapshot_del(struct devlink_snapshot *snapshot) -{ - snapshot->region->cur_snapshots--; - list_del(&snapshot->list); - (*snapshot->data_destructor)(snapshot->data); - kfree(snapshot); -} - #define DEVLINK_NL_FLAG_NEED_DEVLINK BIT(0) #define DEVLINK_NL_FLAG_NEED_PORT BIT(1) #define DEVLINK_NL_FLAG_NEED_SB BIT(2) @@ -476,6 +471,8 @@ static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink, if (devlink_nl_put_handle(msg, devlink)) goto nla_put_failure; + if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed)) + goto nla_put_failure; genlmsg_end(msg, hdr); return 0; @@ -515,32 +512,37 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, return 0; if (nla_put_u16(msg, DEVLINK_ATTR_PORT_FLAVOUR, attrs->flavour)) return -EMSGSIZE; - if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_PF) { + switch (devlink_port->attrs.flavour) { + case DEVLINK_PORT_FLAVOUR_PCI_PF: if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_pf.pf)) return -EMSGSIZE; - } else if (devlink_port->attrs.flavour == DEVLINK_PORT_FLAVOUR_PCI_VF) { + break; + case DEVLINK_PORT_FLAVOUR_PCI_VF: if (nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_PF_NUMBER, attrs->pci_vf.pf) || nla_put_u16(msg, DEVLINK_ATTR_PORT_PCI_VF_NUMBER, attrs->pci_vf.vf)) return -EMSGSIZE; + break; + case DEVLINK_PORT_FLAVOUR_PHYSICAL: + case DEVLINK_PORT_FLAVOUR_CPU: + case DEVLINK_PORT_FLAVOUR_DSA: + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, + attrs->phys.port_number)) + return -EMSGSIZE; + if (!attrs->split) + return 0; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, + attrs->phys.port_number)) + return -EMSGSIZE; + if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, + attrs->phys.split_subport_number)) + return -EMSGSIZE; + break; + default: + break; } - if (devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_PHYSICAL && - devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_CPU && - devlink_port->attrs.flavour != DEVLINK_PORT_FLAVOUR_DSA) - return 0; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, - attrs->phys.port_number)) - return -EMSGSIZE; - if (!attrs->split) - return 0; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_GROUP, - attrs->phys.port_number)) - return -EMSGSIZE; - if (nla_put_u32(msg, DEVLINK_ATTR_PORT_SPLIT_SUBPORT_NUMBER, - attrs->phys.split_subport_number)) - return -EMSGSIZE; return 0; } @@ -560,7 +562,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) goto nla_put_failure; - spin_lock(&devlink_port->type_lock); + spin_lock_bh(&devlink_port->type_lock); if (nla_put_u16(msg, DEVLINK_ATTR_PORT_TYPE, devlink_port->type)) goto nla_put_failure_type_locked; if (devlink_port->desired_type != DEVLINK_PORT_TYPE_NOTSET && @@ -585,7 +587,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, ibdev->name)) goto nla_put_failure_type_locked; } - spin_unlock(&devlink_port->type_lock); + spin_unlock_bh(&devlink_port->type_lock); if (devlink_nl_port_attrs_put(msg, devlink_port)) goto nla_put_failure; @@ -593,7 +595,7 @@ static int devlink_nl_port_fill(struct sk_buff *msg, struct devlink *devlink, return 0; nla_put_failure_type_locked: - spin_unlock(&devlink_port->type_lock); + spin_unlock_bh(&devlink_port->type_lock); nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; @@ -2672,12 +2674,32 @@ devlink_resources_validate(struct devlink *devlink, return err; } +static bool devlink_reload_supported(struct devlink *devlink) +{ + return devlink->ops->reload_down && devlink->ops->reload_up; +} + +static void devlink_reload_failed_set(struct devlink *devlink, + bool reload_failed) +{ + if (devlink->reload_failed == reload_failed) + return; + devlink->reload_failed = reload_failed; + devlink_notify(devlink, DEVLINK_CMD_NEW); +} + +bool devlink_is_reload_failed(const struct devlink *devlink) +{ + return devlink->reload_failed; +} +EXPORT_SYMBOL_GPL(devlink_is_reload_failed); + static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) { struct devlink *devlink = info->user_ptr[0]; int err; - if (!devlink->ops->reload) + if (!devlink_reload_supported(devlink)) return -EOPNOTSUPP; err = devlink_resources_validate(devlink, NULL, info); @@ -2685,7 +2707,12 @@ static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info) NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed"); return err; } - return devlink->ops->reload(devlink, info->extack); + err = devlink->ops->reload_down(devlink, info->extack); + if (err) + return err; + err = devlink->ops->reload_up(devlink, info->extack); + devlink_reload_failed_set(devlink, !!err); + return err; } static int devlink_nl_flash_update_fill(struct sk_buff *msg, @@ -2852,6 +2879,11 @@ static const struct devlink_param devlink_param_generic[] = { .name = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_NAME, .type = DEVLINK_PARAM_GENERIC_FW_LOAD_POLICY_TYPE, }, + { + .id = DEVLINK_PARAM_GENERIC_ID_RESET_DEV_ON_DRV_PROBE, + .name = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_NAME, + .type = DEVLINK_PARAM_GENERIC_RESET_DEV_ON_DRV_PROBE_TYPE, + }, }; static int devlink_param_generic_verify(const struct devlink_param *param) @@ -3596,6 +3628,16 @@ out_free_msg: nlmsg_free(msg); } +static void devlink_region_snapshot_del(struct devlink_region *region, + struct devlink_snapshot *snapshot) +{ + devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); + region->cur_snapshots--; + list_del(&snapshot->list); + (*snapshot->data_destructor)(snapshot->data); + kfree(snapshot); +} + static int devlink_nl_cmd_region_get_doit(struct sk_buff *skb, struct genl_info *info) { @@ -3691,8 +3733,7 @@ static int devlink_nl_cmd_region_del(struct sk_buff *skb, if (!snapshot) return -EINVAL; - devlink_nl_region_notify(region, snapshot, DEVLINK_CMD_REGION_DEL); - devlink_region_snapshot_del(snapshot); + devlink_region_snapshot_del(region, snapshot); return 0; } @@ -3748,8 +3789,8 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb, if (!snapshot) return -EINVAL; - if (end_offset > snapshot->data_len || dump) - end_offset = snapshot->data_len; + if (end_offset > region->size || dump) + end_offset = region->size; while (curr_offset < end_offset) { u32 data_size; @@ -5154,6 +5195,571 @@ devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb, return 0; } +struct devlink_stats { + u64 rx_bytes; + u64 rx_packets; + struct u64_stats_sync syncp; +}; + +/** + * struct devlink_trap_group_item - Packet trap group attributes. + * @group: Immutable packet trap group attributes. + * @refcount: Number of trap items using the group. + * @list: trap_group_list member. + * @stats: Trap group statistics. + * + * Describes packet trap group attributes. Created by devlink during trap + * registration. + */ +struct devlink_trap_group_item { + const struct devlink_trap_group *group; + refcount_t refcount; + struct list_head list; + struct devlink_stats __percpu *stats; +}; + +/** + * struct devlink_trap_item - Packet trap attributes. + * @trap: Immutable packet trap attributes. + * @group_item: Associated group item. + * @list: trap_list member. + * @action: Trap action. + * @stats: Trap statistics. + * @priv: Driver private information. + * + * Describes both mutable and immutable packet trap attributes. Created by + * devlink during trap registration and used for all trap related operations. + */ +struct devlink_trap_item { + const struct devlink_trap *trap; + struct devlink_trap_group_item *group_item; + struct list_head list; + enum devlink_trap_action action; + struct devlink_stats __percpu *stats; + void *priv; +}; + +static struct devlink_trap_item * +devlink_trap_item_lookup(struct devlink *devlink, const char *name) +{ + struct devlink_trap_item *trap_item; + + list_for_each_entry(trap_item, &devlink->trap_list, list) { + if (!strcmp(trap_item->trap->name, name)) + return trap_item; + } + + return NULL; +} + +static struct devlink_trap_item * +devlink_trap_item_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + struct nlattr *attr; + + if (!info->attrs[DEVLINK_ATTR_TRAP_NAME]) + return NULL; + attr = info->attrs[DEVLINK_ATTR_TRAP_NAME]; + + return devlink_trap_item_lookup(devlink, nla_data(attr)); +} + +static int +devlink_trap_action_get_from_info(struct genl_info *info, + enum devlink_trap_action *p_trap_action) +{ + u8 val; + + val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]); + switch (val) { + case DEVLINK_TRAP_ACTION_DROP: /* fall-through */ + case DEVLINK_TRAP_ACTION_TRAP: + *p_trap_action = val; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int devlink_trap_metadata_put(struct sk_buff *msg, + const struct devlink_trap *trap) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, DEVLINK_ATTR_TRAP_METADATA); + if (!attr) + return -EMSGSIZE; + + if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) && + nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static void devlink_trap_stats_read(struct devlink_stats __percpu *trap_stats, + struct devlink_stats *stats) +{ + int i; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(i) { + struct devlink_stats *cpu_stats; + u64 rx_packets, rx_bytes; + unsigned int start; + + cpu_stats = per_cpu_ptr(trap_stats, i); + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + rx_packets = cpu_stats->rx_packets; + rx_bytes = cpu_stats->rx_bytes; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + } +} + +static int devlink_trap_stats_put(struct sk_buff *msg, + struct devlink_stats __percpu *trap_stats) +{ + struct devlink_stats stats; + struct nlattr *attr; + + devlink_trap_stats_read(trap_stats, &stats); + + attr = nla_nest_start(msg, DEVLINK_ATTR_STATS); + if (!attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_PACKETS, + stats.rx_packets, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, DEVLINK_ATTR_STATS_RX_BYTES, + stats.rx_bytes, DEVLINK_ATTR_PAD)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int devlink_nl_trap_fill(struct sk_buff *msg, struct devlink *devlink, + const struct devlink_trap_item *trap_item, + enum devlink_command cmd, u32 portid, u32 seq, + int flags) +{ + struct devlink_trap_group_item *group_item = trap_item->group_item; + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME, + group_item->group->name)) + goto nla_put_failure; + + if (nla_put_string(msg, DEVLINK_ATTR_TRAP_NAME, trap_item->trap->name)) + goto nla_put_failure; + + if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_TYPE, trap_item->trap->type)) + goto nla_put_failure; + + if (trap_item->trap->generic && + nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC)) + goto nla_put_failure; + + if (nla_put_u8(msg, DEVLINK_ATTR_TRAP_ACTION, trap_item->action)) + goto nla_put_failure; + + err = devlink_trap_metadata_put(msg, trap_item->trap); + if (err) + goto nla_put_failure; + + err = devlink_trap_stats_put(msg, trap_item->stats); + if (err) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_trap_item *trap_item; + struct sk_buff *msg; + int err; + + if (list_empty(&devlink->trap_list)) + return -EOPNOTSUPP; + + trap_item = devlink_trap_item_get_from_info(devlink, info); + if (!trap_item) { + NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap"); + return -ENOENT; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_trap_fill(msg, devlink, trap_item, + DEVLINK_CMD_TRAP_NEW, info->snd_portid, + info->snd_seq, 0); + if (err) + goto err_trap_fill; + + return genlmsg_reply(msg, info); + +err_trap_fill: + nlmsg_free(msg); + return err; +} + +static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + struct devlink_trap_item *trap_item; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(trap_item, &devlink->trap_list, list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_trap_fill(msg, devlink, trap_item, + DEVLINK_CMD_TRAP_NEW, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int __devlink_trap_action_set(struct devlink *devlink, + struct devlink_trap_item *trap_item, + enum devlink_trap_action trap_action, + struct netlink_ext_ack *extack) +{ + int err; + + if (trap_item->action != trap_action && + trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) { + NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping"); + return 0; + } + + err = devlink->ops->trap_action_set(devlink, trap_item->trap, + trap_action); + if (err) + return err; + + trap_item->action = trap_action; + + return 0; +} + +static int devlink_trap_action_set(struct devlink *devlink, + struct devlink_trap_item *trap_item, + struct genl_info *info) +{ + enum devlink_trap_action trap_action; + int err; + + if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION]) + return 0; + + err = devlink_trap_action_get_from_info(info, &trap_action); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action"); + return -EINVAL; + } + + return __devlink_trap_action_set(devlink, trap_item, trap_action, + info->extack); +} + +static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_trap_item *trap_item; + int err; + + if (list_empty(&devlink->trap_list)) + return -EOPNOTSUPP; + + trap_item = devlink_trap_item_get_from_info(devlink, info); + if (!trap_item) { + NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap"); + return -ENOENT; + } + + err = devlink_trap_action_set(devlink, trap_item, info); + if (err) + return err; + + return 0; +} + +static struct devlink_trap_group_item * +devlink_trap_group_item_lookup(struct devlink *devlink, const char *name) +{ + struct devlink_trap_group_item *group_item; + + list_for_each_entry(group_item, &devlink->trap_group_list, list) { + if (!strcmp(group_item->group->name, name)) + return group_item; + } + + return NULL; +} + +static struct devlink_trap_group_item * +devlink_trap_group_item_get_from_info(struct devlink *devlink, + struct genl_info *info) +{ + char *name; + + if (!info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]) + return NULL; + name = nla_data(info->attrs[DEVLINK_ATTR_TRAP_GROUP_NAME]); + + return devlink_trap_group_item_lookup(devlink, name); +} + +static int +devlink_nl_trap_group_fill(struct sk_buff *msg, struct devlink *devlink, + const struct devlink_trap_group_item *group_item, + enum devlink_command cmd, u32 portid, u32 seq, + int flags) +{ + void *hdr; + int err; + + hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd); + if (!hdr) + return -EMSGSIZE; + + if (devlink_nl_put_handle(msg, devlink)) + goto nla_put_failure; + + if (nla_put_string(msg, DEVLINK_ATTR_TRAP_GROUP_NAME, + group_item->group->name)) + goto nla_put_failure; + + if (group_item->group->generic && + nla_put_flag(msg, DEVLINK_ATTR_TRAP_GENERIC)) + goto nla_put_failure; + + err = devlink_trap_stats_put(msg, group_item->stats); + if (err) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_trap_group_item *group_item; + struct sk_buff *msg; + int err; + + if (list_empty(&devlink->trap_group_list)) + return -EOPNOTSUPP; + + group_item = devlink_trap_group_item_get_from_info(devlink, info); + if (!group_item) { + NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group"); + return -ENOENT; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + err = devlink_nl_trap_group_fill(msg, devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_NEW, + info->snd_portid, info->snd_seq, 0); + if (err) + goto err_trap_group_fill; + + return genlmsg_reply(msg, info); + +err_trap_group_fill: + nlmsg_free(msg); + return err; +} + +static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg, + struct netlink_callback *cb) +{ + enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW; + struct devlink_trap_group_item *group_item; + u32 portid = NETLINK_CB(cb->skb).portid; + struct devlink *devlink; + int start = cb->args[0]; + int idx = 0; + int err; + + mutex_lock(&devlink_mutex); + list_for_each_entry(devlink, &devlink_list, list) { + if (!net_eq(devlink_net(devlink), sock_net(msg->sk))) + continue; + mutex_lock(&devlink->lock); + list_for_each_entry(group_item, &devlink->trap_group_list, + list) { + if (idx < start) { + idx++; + continue; + } + err = devlink_nl_trap_group_fill(msg, devlink, + group_item, cmd, + portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI); + if (err) { + mutex_unlock(&devlink->lock); + goto out; + } + idx++; + } + mutex_unlock(&devlink->lock); + } +out: + mutex_unlock(&devlink_mutex); + + cb->args[0] = idx; + return msg->len; +} + +static int +__devlink_trap_group_action_set(struct devlink *devlink, + struct devlink_trap_group_item *group_item, + enum devlink_trap_action trap_action, + struct netlink_ext_ack *extack) +{ + const char *group_name = group_item->group->name; + struct devlink_trap_item *trap_item; + int err; + + list_for_each_entry(trap_item, &devlink->trap_list, list) { + if (strcmp(trap_item->trap->group.name, group_name)) + continue; + err = __devlink_trap_action_set(devlink, trap_item, + trap_action, extack); + if (err) + return err; + } + + return 0; +} + +static int +devlink_trap_group_action_set(struct devlink *devlink, + struct devlink_trap_group_item *group_item, + struct genl_info *info) +{ + enum devlink_trap_action trap_action; + int err; + + if (!info->attrs[DEVLINK_ATTR_TRAP_ACTION]) + return 0; + + err = devlink_trap_action_get_from_info(info, &trap_action); + if (err) { + NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action"); + return -EINVAL; + } + + err = __devlink_trap_group_action_set(devlink, group_item, trap_action, + info->extack); + if (err) + return err; + + return 0; +} + +static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + struct devlink *devlink = info->user_ptr[0]; + struct devlink_trap_group_item *group_item; + int err; + + if (list_empty(&devlink->trap_group_list)) + return -EOPNOTSUPP; + + group_item = devlink_trap_group_item_get_from_info(devlink, info); + if (!group_item) { + NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group"); + return -ENOENT; + } + + err = devlink_trap_group_action_set(devlink, group_item, info); + if (err) + return err; + + return 0; +} + static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING }, @@ -5184,6 +5790,9 @@ static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = { [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 }, [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING }, [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING }, + [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 }, + [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING }, }; static const struct genl_ops devlink_nl_ops[] = { @@ -5483,6 +6092,32 @@ static const struct genl_ops devlink_nl_ops[] = { .flags = GENL_ADMIN_PERM, .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, }, + { + .cmd = DEVLINK_CMD_TRAP_GET, + .doit = devlink_nl_cmd_trap_get_doit, + .dumpit = devlink_nl_cmd_trap_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_TRAP_SET, + .doit = devlink_nl_cmd_trap_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, + { + .cmd = DEVLINK_CMD_TRAP_GROUP_GET, + .doit = devlink_nl_cmd_trap_group_get_doit, + .dumpit = devlink_nl_cmd_trap_group_get_dumpit, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + /* can be retrieved by unprivileged users */ + }, + { + .cmd = DEVLINK_CMD_TRAP_GROUP_SET, + .doit = devlink_nl_cmd_trap_group_set_doit, + .flags = GENL_ADMIN_PERM, + .internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK, + }, }; static struct genl_family devlink_nl_family __ro_after_init = { @@ -5528,6 +6163,8 @@ struct devlink *devlink_alloc(const struct devlink_ops *ops, size_t priv_size) INIT_LIST_HEAD(&devlink->param_list); INIT_LIST_HEAD(&devlink->region_list); INIT_LIST_HEAD(&devlink->reporter_list); + INIT_LIST_HEAD(&devlink->trap_list); + INIT_LIST_HEAD(&devlink->trap_group_list); mutex_init(&devlink->lock); mutex_init(&devlink->reporters_lock); return devlink; @@ -5574,6 +6211,8 @@ void devlink_free(struct devlink *devlink) { mutex_destroy(&devlink->reporters_lock); mutex_destroy(&devlink->lock); + WARN_ON(!list_empty(&devlink->trap_group_list)); + WARN_ON(!list_empty(&devlink->trap_list)); WARN_ON(!list_empty(&devlink->reporter_list)); WARN_ON(!list_empty(&devlink->region_list)); WARN_ON(!list_empty(&devlink->param_list)); @@ -5678,10 +6317,10 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, if (WARN_ON(!devlink_port->registered)) return; devlink_port_type_warn_cancel(devlink_port); - spin_lock(&devlink_port->type_lock); + spin_lock_bh(&devlink_port->type_lock); devlink_port->type = type; devlink_port->type_dev = type_dev; - spin_unlock(&devlink_port->type_lock); + spin_unlock_bh(&devlink_port->type_lock); devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_NEW); } @@ -6538,7 +7177,7 @@ __devlink_param_driverinit_value_set(struct devlink *devlink, int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id, union devlink_param_value *init_val) { - if (!devlink->ops->reload) + if (!devlink_reload_supported(devlink)) return -EOPNOTSUPP; return __devlink_param_driverinit_value_get(&devlink->param_list, @@ -6585,7 +7224,7 @@ int devlink_port_param_driverinit_value_get(struct devlink_port *devlink_port, { struct devlink *devlink = devlink_port->devlink; - if (!devlink->ops->reload) + if (!devlink_reload_supported(devlink)) return -EOPNOTSUPP; return __devlink_param_driverinit_value_get(&devlink_port->param_list, @@ -6744,7 +7383,7 @@ void devlink_region_destroy(struct devlink_region *region) /* Free all snapshots of region */ list_for_each_entry_safe(snapshot, ts, ®ion->snapshot_list, list) - devlink_region_snapshot_del(snapshot); + devlink_region_snapshot_del(region, snapshot); list_del(®ion->list); @@ -6784,12 +7423,11 @@ EXPORT_SYMBOL_GPL(devlink_region_shapshot_id_get); * The @snapshot_id should be obtained using the getter function. * * @region: devlink region of the snapshot - * @data_len: size of snapshot data * @data: snapshot data * @snapshot_id: snapshot id to be created * @data_destructor: pointer to destructor function to free data */ -int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, +int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id, devlink_snapshot_data_dest_t *data_destructor) { @@ -6819,7 +7457,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u64 data_len, snapshot->id = snapshot_id; snapshot->region = region; snapshot->data = data; - snapshot->data_len = data_len; snapshot->data_destructor = data_destructor; list_add_tail(&snapshot->list, ®ion->snapshot_list); @@ -6836,6 +7473,475 @@ unlock: } EXPORT_SYMBOL_GPL(devlink_region_snapshot_create); +#define DEVLINK_TRAP(_id, _type) \ + { \ + .type = DEVLINK_TRAP_TYPE_##_type, \ + .id = DEVLINK_TRAP_GENERIC_ID_##_id, \ + .name = DEVLINK_TRAP_GENERIC_NAME_##_id, \ + } + +static const struct devlink_trap devlink_trap_generic[] = { + DEVLINK_TRAP(SMAC_MC, DROP), + DEVLINK_TRAP(VLAN_TAG_MISMATCH, DROP), + DEVLINK_TRAP(INGRESS_VLAN_FILTER, DROP), + DEVLINK_TRAP(INGRESS_STP_FILTER, DROP), + DEVLINK_TRAP(EMPTY_TX_LIST, DROP), + DEVLINK_TRAP(PORT_LOOPBACK_FILTER, DROP), + DEVLINK_TRAP(BLACKHOLE_ROUTE, DROP), + DEVLINK_TRAP(TTL_ERROR, EXCEPTION), + DEVLINK_TRAP(TAIL_DROP, DROP), +}; + +#define DEVLINK_TRAP_GROUP(_id) \ + { \ + .id = DEVLINK_TRAP_GROUP_GENERIC_ID_##_id, \ + .name = DEVLINK_TRAP_GROUP_GENERIC_NAME_##_id, \ + } + +static const struct devlink_trap_group devlink_trap_group_generic[] = { + DEVLINK_TRAP_GROUP(L2_DROPS), + DEVLINK_TRAP_GROUP(L3_DROPS), + DEVLINK_TRAP_GROUP(BUFFER_DROPS), +}; + +static int devlink_trap_generic_verify(const struct devlink_trap *trap) +{ + if (trap->id > DEVLINK_TRAP_GENERIC_ID_MAX) + return -EINVAL; + + if (strcmp(trap->name, devlink_trap_generic[trap->id].name)) + return -EINVAL; + + if (trap->type != devlink_trap_generic[trap->id].type) + return -EINVAL; + + return 0; +} + +static int devlink_trap_driver_verify(const struct devlink_trap *trap) +{ + int i; + + if (trap->id <= DEVLINK_TRAP_GENERIC_ID_MAX) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(devlink_trap_generic); i++) { + if (!strcmp(trap->name, devlink_trap_generic[i].name)) + return -EEXIST; + } + + return 0; +} + +static int devlink_trap_verify(const struct devlink_trap *trap) +{ + if (!trap || !trap->name || !trap->group.name) + return -EINVAL; + + if (trap->generic) + return devlink_trap_generic_verify(trap); + else + return devlink_trap_driver_verify(trap); +} + +static int +devlink_trap_group_generic_verify(const struct devlink_trap_group *group) +{ + if (group->id > DEVLINK_TRAP_GROUP_GENERIC_ID_MAX) + return -EINVAL; + + if (strcmp(group->name, devlink_trap_group_generic[group->id].name)) + return -EINVAL; + + return 0; +} + +static int +devlink_trap_group_driver_verify(const struct devlink_trap_group *group) +{ + int i; + + if (group->id <= DEVLINK_TRAP_GROUP_GENERIC_ID_MAX) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(devlink_trap_group_generic); i++) { + if (!strcmp(group->name, devlink_trap_group_generic[i].name)) + return -EEXIST; + } + + return 0; +} + +static int devlink_trap_group_verify(const struct devlink_trap_group *group) +{ + if (group->generic) + return devlink_trap_group_generic_verify(group); + else + return devlink_trap_group_driver_verify(group); +} + +static void +devlink_trap_group_notify(struct devlink *devlink, + const struct devlink_trap_group_item *group_item, + enum devlink_command cmd) +{ + struct sk_buff *msg; + int err; + + WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_GROUP_NEW && + cmd != DEVLINK_CMD_TRAP_GROUP_DEL); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_trap_group_fill(msg, devlink, group_item, cmd, 0, 0, + 0); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +static struct devlink_trap_group_item * +devlink_trap_group_item_create(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct devlink_trap_group_item *group_item; + int err; + + err = devlink_trap_group_verify(group); + if (err) + return ERR_PTR(err); + + group_item = kzalloc(sizeof(*group_item), GFP_KERNEL); + if (!group_item) + return ERR_PTR(-ENOMEM); + + group_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats); + if (!group_item->stats) { + err = -ENOMEM; + goto err_stats_alloc; + } + + group_item->group = group; + refcount_set(&group_item->refcount, 1); + + if (devlink->ops->trap_group_init) { + err = devlink->ops->trap_group_init(devlink, group); + if (err) + goto err_group_init; + } + + list_add_tail(&group_item->list, &devlink->trap_group_list); + devlink_trap_group_notify(devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_NEW); + + return group_item; + +err_group_init: + free_percpu(group_item->stats); +err_stats_alloc: + kfree(group_item); + return ERR_PTR(err); +} + +static void +devlink_trap_group_item_destroy(struct devlink *devlink, + struct devlink_trap_group_item *group_item) +{ + devlink_trap_group_notify(devlink, group_item, + DEVLINK_CMD_TRAP_GROUP_DEL); + list_del(&group_item->list); + free_percpu(group_item->stats); + kfree(group_item); +} + +static struct devlink_trap_group_item * +devlink_trap_group_item_get(struct devlink *devlink, + const struct devlink_trap_group *group) +{ + struct devlink_trap_group_item *group_item; + + group_item = devlink_trap_group_item_lookup(devlink, group->name); + if (group_item) { + refcount_inc(&group_item->refcount); + return group_item; + } + + return devlink_trap_group_item_create(devlink, group); +} + +static void +devlink_trap_group_item_put(struct devlink *devlink, + struct devlink_trap_group_item *group_item) +{ + if (!refcount_dec_and_test(&group_item->refcount)) + return; + + devlink_trap_group_item_destroy(devlink, group_item); +} + +static int +devlink_trap_item_group_link(struct devlink *devlink, + struct devlink_trap_item *trap_item) +{ + struct devlink_trap_group_item *group_item; + + group_item = devlink_trap_group_item_get(devlink, + &trap_item->trap->group); + if (IS_ERR(group_item)) + return PTR_ERR(group_item); + + trap_item->group_item = group_item; + + return 0; +} + +static void +devlink_trap_item_group_unlink(struct devlink *devlink, + struct devlink_trap_item *trap_item) +{ + devlink_trap_group_item_put(devlink, trap_item->group_item); +} + +static void devlink_trap_notify(struct devlink *devlink, + const struct devlink_trap_item *trap_item, + enum devlink_command cmd) +{ + struct sk_buff *msg; + int err; + + WARN_ON_ONCE(cmd != DEVLINK_CMD_TRAP_NEW && + cmd != DEVLINK_CMD_TRAP_DEL); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + err = devlink_nl_trap_fill(msg, devlink, trap_item, cmd, 0, 0, 0); + if (err) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); +} + +static int +devlink_trap_register(struct devlink *devlink, + const struct devlink_trap *trap, void *priv) +{ + struct devlink_trap_item *trap_item; + int err; + + if (devlink_trap_item_lookup(devlink, trap->name)) + return -EEXIST; + + trap_item = kzalloc(sizeof(*trap_item), GFP_KERNEL); + if (!trap_item) + return -ENOMEM; + + trap_item->stats = netdev_alloc_pcpu_stats(struct devlink_stats); + if (!trap_item->stats) { + err = -ENOMEM; + goto err_stats_alloc; + } + + trap_item->trap = trap; + trap_item->action = trap->init_action; + trap_item->priv = priv; + + err = devlink_trap_item_group_link(devlink, trap_item); + if (err) + goto err_group_link; + + err = devlink->ops->trap_init(devlink, trap, trap_item); + if (err) + goto err_trap_init; + + list_add_tail(&trap_item->list, &devlink->trap_list); + devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_NEW); + + return 0; + +err_trap_init: + devlink_trap_item_group_unlink(devlink, trap_item); +err_group_link: + free_percpu(trap_item->stats); +err_stats_alloc: + kfree(trap_item); + return err; +} + +static void devlink_trap_unregister(struct devlink *devlink, + const struct devlink_trap *trap) +{ + struct devlink_trap_item *trap_item; + + trap_item = devlink_trap_item_lookup(devlink, trap->name); + if (WARN_ON_ONCE(!trap_item)) + return; + + devlink_trap_notify(devlink, trap_item, DEVLINK_CMD_TRAP_DEL); + list_del(&trap_item->list); + if (devlink->ops->trap_fini) + devlink->ops->trap_fini(devlink, trap, trap_item); + devlink_trap_item_group_unlink(devlink, trap_item); + free_percpu(trap_item->stats); + kfree(trap_item); +} + +static void devlink_trap_disable(struct devlink *devlink, + const struct devlink_trap *trap) +{ + struct devlink_trap_item *trap_item; + + trap_item = devlink_trap_item_lookup(devlink, trap->name); + if (WARN_ON_ONCE(!trap_item)) + return; + + devlink->ops->trap_action_set(devlink, trap, DEVLINK_TRAP_ACTION_DROP); + trap_item->action = DEVLINK_TRAP_ACTION_DROP; +} + +/** + * devlink_traps_register - Register packet traps with devlink. + * @devlink: devlink. + * @traps: Packet traps. + * @traps_count: Count of provided packet traps. + * @priv: Driver private information. + * + * Return: Non-zero value on failure. + */ +int devlink_traps_register(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count, void *priv) +{ + int i, err; + + if (!devlink->ops->trap_init || !devlink->ops->trap_action_set) + return -EINVAL; + + mutex_lock(&devlink->lock); + for (i = 0; i < traps_count; i++) { + const struct devlink_trap *trap = &traps[i]; + + err = devlink_trap_verify(trap); + if (err) + goto err_trap_verify; + + err = devlink_trap_register(devlink, trap, priv); + if (err) + goto err_trap_register; + } + mutex_unlock(&devlink->lock); + + return 0; + +err_trap_register: +err_trap_verify: + for (i--; i >= 0; i--) + devlink_trap_unregister(devlink, &traps[i]); + mutex_unlock(&devlink->lock); + return err; +} +EXPORT_SYMBOL_GPL(devlink_traps_register); + +/** + * devlink_traps_unregister - Unregister packet traps from devlink. + * @devlink: devlink. + * @traps: Packet traps. + * @traps_count: Count of provided packet traps. + */ +void devlink_traps_unregister(struct devlink *devlink, + const struct devlink_trap *traps, + size_t traps_count) +{ + int i; + + mutex_lock(&devlink->lock); + /* Make sure we do not have any packets in-flight while unregistering + * traps by disabling all of them and waiting for a grace period. + */ + for (i = traps_count - 1; i >= 0; i--) + devlink_trap_disable(devlink, &traps[i]); + synchronize_rcu(); + for (i = traps_count - 1; i >= 0; i--) + devlink_trap_unregister(devlink, &traps[i]); + mutex_unlock(&devlink->lock); +} +EXPORT_SYMBOL_GPL(devlink_traps_unregister); + +static void +devlink_trap_stats_update(struct devlink_stats __percpu *trap_stats, + size_t skb_len) +{ + struct devlink_stats *stats; + + stats = this_cpu_ptr(trap_stats); + u64_stats_update_begin(&stats->syncp); + stats->rx_bytes += skb_len; + stats->rx_packets++; + u64_stats_update_end(&stats->syncp); +} + +static void +devlink_trap_report_metadata_fill(struct net_dm_hw_metadata *hw_metadata, + const struct devlink_trap_item *trap_item, + struct devlink_port *in_devlink_port) +{ + struct devlink_trap_group_item *group_item = trap_item->group_item; + + hw_metadata->trap_group_name = group_item->group->name; + hw_metadata->trap_name = trap_item->trap->name; + + spin_lock(&in_devlink_port->type_lock); + if (in_devlink_port->type == DEVLINK_PORT_TYPE_ETH) + hw_metadata->input_dev = in_devlink_port->type_dev; + spin_unlock(&in_devlink_port->type_lock); +} + +/** + * devlink_trap_report - Report trapped packet to drop monitor. + * @devlink: devlink. + * @skb: Trapped packet. + * @trap_ctx: Trap context. + * @in_devlink_port: Input devlink port. + */ +void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb, + void *trap_ctx, struct devlink_port *in_devlink_port) +{ + struct devlink_trap_item *trap_item = trap_ctx; + struct net_dm_hw_metadata hw_metadata = {}; + + devlink_trap_stats_update(trap_item->stats, skb->len); + devlink_trap_stats_update(trap_item->group_item->stats, skb->len); + + devlink_trap_report_metadata_fill(&hw_metadata, trap_item, + in_devlink_port); + net_dm_hw_report(skb, &hw_metadata); +} +EXPORT_SYMBOL_GPL(devlink_trap_report); + +/** + * devlink_trap_ctx_priv - Trap context to driver private information. + * @trap_ctx: Trap context. + * + * Return: Driver private information passed during registration. + */ +void *devlink_trap_ctx_priv(void *trap_ctx) +{ + struct devlink_trap_item *trap_item = trap_ctx; + + return trap_item->priv; +} +EXPORT_SYMBOL_GPL(devlink_trap_ctx_priv); + static void __devlink_compat_running_version(struct devlink *devlink, char *buf, size_t len) { @@ -6941,11 +8047,10 @@ int devlink_compat_switch_id_get(struct net_device *dev, { struct devlink_port *devlink_port; - /* RTNL mutex is held here which ensures that devlink_port - * instance cannot disappear in the middle. No need to take + /* Caller must hold RTNL mutex or reference to dev, which ensures that + * devlink_port instance cannot disappear in the middle. No need to take * any devlink lock as only permanent values are accessed. */ - ASSERT_RTNL(); devlink_port = netdev_to_devlink_port(dev); if (!devlink_port || !devlink_port->attrs.switch_port) return -EOPNOTSUPP; diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 4ea4347f5062..536e032d95c8 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -26,6 +26,7 @@ #include <linux/bitops.h> #include <linux/slab.h> #include <linux/module.h> +#include <net/drop_monitor.h> #include <net/genetlink.h> #include <net/netevent.h> @@ -43,13 +44,44 @@ * netlink alerts */ static int trace_state = TRACE_OFF; -static DEFINE_MUTEX(trace_state_mutex); +static bool monitor_hw; + +/* net_dm_mutex + * + * An overall lock guarding every operation coming from userspace. + * It also guards the global 'hw_stats_list' list. + */ +static DEFINE_MUTEX(net_dm_mutex); + +struct net_dm_stats { + u64 dropped; + struct u64_stats_sync syncp; +}; + +#define NET_DM_MAX_HW_TRAP_NAME_LEN 40 + +struct net_dm_hw_entry { + char trap_name[NET_DM_MAX_HW_TRAP_NAME_LEN]; + u32 count; +}; + +struct net_dm_hw_entries { + u32 num_entries; + struct net_dm_hw_entry entries[0]; +}; struct per_cpu_dm_data { - spinlock_t lock; - struct sk_buff *skb; + spinlock_t lock; /* Protects 'skb', 'hw_entries' and + * 'send_timer' + */ + union { + struct sk_buff *skb; + struct net_dm_hw_entries *hw_entries; + }; + struct sk_buff_head drop_queue; struct work_struct dm_alert_work; struct timer_list send_timer; + struct net_dm_stats stats; }; struct dm_hw_stat_delta { @@ -63,12 +95,37 @@ struct dm_hw_stat_delta { static struct genl_family net_drop_monitor_family; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); +static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_hw_cpu_data); static int dm_hit_limit = 64; static int dm_delay = 1; static unsigned long dm_hw_check_delta = 2*HZ; static LIST_HEAD(hw_stats_list); +static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY; +static u32 net_dm_trunc_len; +static u32 net_dm_queue_len = 1000; + +struct net_dm_alert_ops { + void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb, + void *location); + void (*napi_poll_probe)(void *ignore, struct napi_struct *napi, + int work, int budget); + void (*work_item_func)(struct work_struct *work); + void (*hw_work_item_func)(struct work_struct *work); + void (*hw_probe)(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata); +}; + +struct net_dm_skb_cb { + union { + struct net_dm_hw_metadata *hw_metadata; + void *pc; + }; +}; + +#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0])) + static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) { size_t al; @@ -235,48 +292,844 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi, rcu_read_unlock(); } -static int set_all_monitor_traces(int state) +static struct net_dm_hw_entries * +net_dm_hw_reset_per_cpu_data(struct per_cpu_dm_data *hw_data) { - int rc = 0; - struct dm_hw_stat_delta *new_stat = NULL; - struct dm_hw_stat_delta *temp; + struct net_dm_hw_entries *hw_entries; + unsigned long flags; + + hw_entries = kzalloc(struct_size(hw_entries, entries, dm_hit_limit), + GFP_KERNEL); + if (!hw_entries) { + /* If the memory allocation failed, we try to perform another + * allocation in 1/10 second. Otherwise, the probe function + * will constantly bail out. + */ + mod_timer(&hw_data->send_timer, jiffies + HZ / 10); + } - mutex_lock(&trace_state_mutex); + spin_lock_irqsave(&hw_data->lock, flags); + swap(hw_data->hw_entries, hw_entries); + spin_unlock_irqrestore(&hw_data->lock, flags); - if (state == trace_state) { - rc = -EAGAIN; - goto out_unlock; + return hw_entries; +} + +static int net_dm_hw_entry_put(struct sk_buff *msg, + const struct net_dm_hw_entry *hw_entry) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRY); + if (!attr) + return -EMSGSIZE; + + if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, hw_entry->trap_name)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_HW_TRAP_COUNT, hw_entry->count)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int net_dm_hw_entries_put(struct sk_buff *msg, + const struct net_dm_hw_entries *hw_entries) +{ + struct nlattr *attr; + int i; + + attr = nla_nest_start(msg, NET_DM_ATTR_HW_ENTRIES); + if (!attr) + return -EMSGSIZE; + + for (i = 0; i < hw_entries->num_entries; i++) { + int rc; + + rc = net_dm_hw_entry_put(msg, &hw_entries->entries[i]); + if (rc) + goto nla_put_failure; } - switch (state) { - case TRACE_ON: - if (!try_module_get(THIS_MODULE)) { - rc = -ENODEV; - break; + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int +net_dm_hw_summary_report_fill(struct sk_buff *msg, + const struct net_dm_hw_entries *hw_entries) +{ + struct net_dm_alert_msg anc_hdr = { 0 }; + void *hdr; + int rc; + + hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, + NET_DM_CMD_ALERT); + if (!hdr) + return -EMSGSIZE; + + /* We need to put the ancillary header in order not to break user + * space. + */ + if (nla_put(msg, NLA_UNSPEC, sizeof(anc_hdr), &anc_hdr)) + goto nla_put_failure; + + rc = net_dm_hw_entries_put(msg, hw_entries); + if (rc) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static void net_dm_hw_summary_work(struct work_struct *work) +{ + struct net_dm_hw_entries *hw_entries; + struct per_cpu_dm_data *hw_data; + struct sk_buff *msg; + int rc; + + hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work); + + hw_entries = net_dm_hw_reset_per_cpu_data(hw_data); + if (!hw_entries) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + goto out; + + rc = net_dm_hw_summary_report_fill(msg, hw_entries); + if (rc) { + nlmsg_free(msg); + goto out; + } + + genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); + +out: + kfree(hw_entries); +} + +static void +net_dm_hw_summary_probe(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata) +{ + struct net_dm_hw_entries *hw_entries; + struct net_dm_hw_entry *hw_entry; + struct per_cpu_dm_data *hw_data; + unsigned long flags; + int i; + + hw_data = this_cpu_ptr(&dm_hw_cpu_data); + spin_lock_irqsave(&hw_data->lock, flags); + hw_entries = hw_data->hw_entries; + + if (!hw_entries) + goto out; + + for (i = 0; i < hw_entries->num_entries; i++) { + hw_entry = &hw_entries->entries[i]; + if (!strncmp(hw_entry->trap_name, hw_metadata->trap_name, + NET_DM_MAX_HW_TRAP_NAME_LEN - 1)) { + hw_entry->count++; + goto out; } + } + if (WARN_ON_ONCE(hw_entries->num_entries == dm_hit_limit)) + goto out; - rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL); - rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL); - break; + hw_entry = &hw_entries->entries[hw_entries->num_entries]; + strlcpy(hw_entry->trap_name, hw_metadata->trap_name, + NET_DM_MAX_HW_TRAP_NAME_LEN - 1); + hw_entry->count = 1; + hw_entries->num_entries++; - case TRACE_OFF: - rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL); - rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL); + if (!timer_pending(&hw_data->send_timer)) { + hw_data->send_timer.expires = jiffies + dm_delay * HZ; + add_timer(&hw_data->send_timer); + } - tracepoint_synchronize_unregister(); +out: + spin_unlock_irqrestore(&hw_data->lock, flags); +} - /* - * Clean the device list +static const struct net_dm_alert_ops net_dm_alert_summary_ops = { + .kfree_skb_probe = trace_kfree_skb_hit, + .napi_poll_probe = trace_napi_poll_hit, + .work_item_func = send_dm_alert, + .hw_work_item_func = net_dm_hw_summary_work, + .hw_probe = net_dm_hw_summary_probe, +}; + +static void net_dm_packet_trace_kfree_skb_hit(void *ignore, + struct sk_buff *skb, + void *location) +{ + ktime_t tstamp = ktime_get_real(); + struct per_cpu_dm_data *data; + struct sk_buff *nskb; + unsigned long flags; + + if (!skb_mac_header_was_set(skb)) + return; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return; + + NET_DM_SKB_CB(nskb)->pc = location; + /* Override the timestamp because we care about the time when the + * packet was dropped. + */ + nskb->tstamp = tstamp; + + data = this_cpu_ptr(&dm_cpu_data); + + spin_lock_irqsave(&data->drop_queue.lock, flags); + if (skb_queue_len(&data->drop_queue) < net_dm_queue_len) + __skb_queue_tail(&data->drop_queue, nskb); + else + goto unlock_free; + spin_unlock_irqrestore(&data->drop_queue.lock, flags); + + schedule_work(&data->dm_alert_work); + + return; + +unlock_free: + spin_unlock_irqrestore(&data->drop_queue.lock, flags); + u64_stats_update_begin(&data->stats.syncp); + data->stats.dropped++; + u64_stats_update_end(&data->stats.syncp); + consume_skb(nskb); +} + +static void net_dm_packet_trace_napi_poll_hit(void *ignore, + struct napi_struct *napi, + int work, int budget) +{ +} + +static size_t net_dm_in_port_size(void) +{ + /* NET_DM_ATTR_IN_PORT nest */ + return nla_total_size(0) + + /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */ + nla_total_size(sizeof(u32)) + + /* NET_DM_ATTR_PORT_NETDEV_NAME */ + nla_total_size(IFNAMSIZ + 1); +} + +#define NET_DM_MAX_SYMBOL_LEN 40 + +static size_t net_dm_packet_report_size(size_t payload_len) +{ + size_t size; + + size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); + + return NLMSG_ALIGN(size) + + /* NET_DM_ATTR_ORIGIN */ + nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_PC */ + nla_total_size(sizeof(u64)) + + /* NET_DM_ATTR_SYMBOL */ + nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) + + /* NET_DM_ATTR_IN_PORT */ + net_dm_in_port_size() + + /* NET_DM_ATTR_TIMESTAMP */ + nla_total_size(sizeof(u64)) + + /* NET_DM_ATTR_ORIG_LEN */ + nla_total_size(sizeof(u32)) + + /* NET_DM_ATTR_PROTO */ + nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_PAYLOAD */ + nla_total_size(payload_len); +} + +static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex, + const char *name) +{ + struct nlattr *attr; + + attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT); + if (!attr) + return -EMSGSIZE; + + if (ifindex && + nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex)) + goto nla_put_failure; + + if (name && nla_put_string(msg, NET_DM_ATTR_PORT_NETDEV_NAME, name)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb, + size_t payload_len) +{ + u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc; + char buf[NET_DM_MAX_SYMBOL_LEN]; + struct nlattr *attr; + void *hdr; + int rc; + + hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, + NET_DM_CMD_PACKET_ALERT); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_SW)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD)) + goto nla_put_failure; + + snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc); + if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf)) + goto nla_put_failure; + + rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif, NULL); + if (rc) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, + ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len)) + goto nla_put_failure; + + if (!payload_len) + goto out; + + if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol))) + goto nla_put_failure; + + attr = skb_put(msg, nla_total_size(payload_len)); + attr->nla_type = NET_DM_ATTR_PAYLOAD; + attr->nla_len = nla_attr_size(payload_len); + if (skb_copy_bits(skb, 0, nla_data(attr), payload_len)) + goto nla_put_failure; + +out: + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO) + +static void net_dm_packet_report(struct sk_buff *skb) +{ + struct sk_buff *msg; + size_t payload_len; + int rc; + + /* Make sure we start copying the packet from the MAC header */ + if (skb->data > skb_mac_header(skb)) + skb_push(skb, skb->data - skb_mac_header(skb)); + else + skb_pull(skb, skb_mac_header(skb) - skb->data); + + /* Ensure packet fits inside a single netlink attribute */ + payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE); + if (net_dm_trunc_len) + payload_len = min_t(size_t, net_dm_trunc_len, payload_len); + + msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL); + if (!msg) + goto out; + + rc = net_dm_packet_report_fill(msg, skb, payload_len); + if (rc) { + nlmsg_free(msg); + goto out; + } + + genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); + +out: + consume_skb(skb); +} + +static void net_dm_packet_work(struct work_struct *work) +{ + struct per_cpu_dm_data *data; + struct sk_buff_head list; + struct sk_buff *skb; + unsigned long flags; + + data = container_of(work, struct per_cpu_dm_data, dm_alert_work); + + __skb_queue_head_init(&list); + + spin_lock_irqsave(&data->drop_queue.lock, flags); + skb_queue_splice_tail_init(&data->drop_queue, &list); + spin_unlock_irqrestore(&data->drop_queue.lock, flags); + + while ((skb = __skb_dequeue(&list))) + net_dm_packet_report(skb); +} + +static size_t +net_dm_hw_packet_report_size(size_t payload_len, + const struct net_dm_hw_metadata *hw_metadata) +{ + size_t size; + + size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize); + + return NLMSG_ALIGN(size) + + /* NET_DM_ATTR_ORIGIN */ + nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_HW_TRAP_GROUP_NAME */ + nla_total_size(strlen(hw_metadata->trap_group_name) + 1) + + /* NET_DM_ATTR_HW_TRAP_NAME */ + nla_total_size(strlen(hw_metadata->trap_name) + 1) + + /* NET_DM_ATTR_IN_PORT */ + net_dm_in_port_size() + + /* NET_DM_ATTR_TIMESTAMP */ + nla_total_size(sizeof(u64)) + + /* NET_DM_ATTR_ORIG_LEN */ + nla_total_size(sizeof(u32)) + + /* NET_DM_ATTR_PROTO */ + nla_total_size(sizeof(u16)) + + /* NET_DM_ATTR_PAYLOAD */ + nla_total_size(payload_len); +} + +static int net_dm_hw_packet_report_fill(struct sk_buff *msg, + struct sk_buff *skb, size_t payload_len) +{ + struct net_dm_hw_metadata *hw_metadata; + struct nlattr *attr; + void *hdr; + + hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; + + hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0, + NET_DM_CMD_PACKET_ALERT); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u16(msg, NET_DM_ATTR_ORIGIN, NET_DM_ORIGIN_HW)) + goto nla_put_failure; + + if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_GROUP_NAME, + hw_metadata->trap_group_name)) + goto nla_put_failure; + + if (nla_put_string(msg, NET_DM_ATTR_HW_TRAP_NAME, + hw_metadata->trap_name)) + goto nla_put_failure; + + if (hw_metadata->input_dev) { + struct net_device *dev = hw_metadata->input_dev; + int rc; + + rc = net_dm_packet_report_in_port_put(msg, dev->ifindex, + dev->name); + if (rc) + goto nla_put_failure; + } + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, + ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len)) + goto nla_put_failure; + + if (!payload_len) + goto out; + + if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol))) + goto nla_put_failure; + + attr = skb_put(msg, nla_total_size(payload_len)); + attr->nla_type = NET_DM_ATTR_PAYLOAD; + attr->nla_len = nla_attr_size(payload_len); + if (skb_copy_bits(skb, 0, nla_data(attr), payload_len)) + goto nla_put_failure; + +out: + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static struct net_dm_hw_metadata * +net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) +{ + struct net_dm_hw_metadata *n_hw_metadata; + const char *trap_group_name; + const char *trap_name; + + n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC); + if (!n_hw_metadata) + return NULL; + + trap_group_name = kmemdup(hw_metadata->trap_group_name, + strlen(hw_metadata->trap_group_name) + 1, + GFP_ATOMIC | __GFP_ZERO); + if (!trap_group_name) + goto free_hw_metadata; + n_hw_metadata->trap_group_name = trap_group_name; + + trap_name = kmemdup(hw_metadata->trap_name, + strlen(hw_metadata->trap_name) + 1, + GFP_ATOMIC | __GFP_ZERO); + if (!trap_name) + goto free_trap_group; + n_hw_metadata->trap_name = trap_name; + + n_hw_metadata->input_dev = hw_metadata->input_dev; + if (n_hw_metadata->input_dev) + dev_hold(n_hw_metadata->input_dev); + + return n_hw_metadata; + +free_trap_group: + kfree(trap_group_name); +free_hw_metadata: + kfree(n_hw_metadata); + return NULL; +} + +static void +net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata) +{ + if (hw_metadata->input_dev) + dev_put(hw_metadata->input_dev); + kfree(hw_metadata->trap_name); + kfree(hw_metadata->trap_group_name); + kfree(hw_metadata); +} + +static void net_dm_hw_packet_report(struct sk_buff *skb) +{ + struct net_dm_hw_metadata *hw_metadata; + struct sk_buff *msg; + size_t payload_len; + int rc; + + if (skb->data > skb_mac_header(skb)) + skb_push(skb, skb->data - skb_mac_header(skb)); + else + skb_pull(skb, skb_mac_header(skb) - skb->data); + + payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE); + if (net_dm_trunc_len) + payload_len = min_t(size_t, net_dm_trunc_len, payload_len); + + hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; + msg = nlmsg_new(net_dm_hw_packet_report_size(payload_len, hw_metadata), + GFP_KERNEL); + if (!msg) + goto out; + + rc = net_dm_hw_packet_report_fill(msg, skb, payload_len); + if (rc) { + nlmsg_free(msg); + goto out; + } + + genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL); + +out: + net_dm_hw_metadata_free(NET_DM_SKB_CB(skb)->hw_metadata); + consume_skb(skb); +} + +static void net_dm_hw_packet_work(struct work_struct *work) +{ + struct per_cpu_dm_data *hw_data; + struct sk_buff_head list; + struct sk_buff *skb; + unsigned long flags; + + hw_data = container_of(work, struct per_cpu_dm_data, dm_alert_work); + + __skb_queue_head_init(&list); + + spin_lock_irqsave(&hw_data->drop_queue.lock, flags); + skb_queue_splice_tail_init(&hw_data->drop_queue, &list); + spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); + + while ((skb = __skb_dequeue(&list))) + net_dm_hw_packet_report(skb); +} + +static void +net_dm_hw_packet_probe(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata) +{ + struct net_dm_hw_metadata *n_hw_metadata; + ktime_t tstamp = ktime_get_real(); + struct per_cpu_dm_data *hw_data; + struct sk_buff *nskb; + unsigned long flags; + + if (!skb_mac_header_was_set(skb)) + return; + + nskb = skb_clone(skb, GFP_ATOMIC); + if (!nskb) + return; + + n_hw_metadata = net_dm_hw_metadata_clone(hw_metadata); + if (!n_hw_metadata) + goto free; + + NET_DM_SKB_CB(nskb)->hw_metadata = n_hw_metadata; + nskb->tstamp = tstamp; + + hw_data = this_cpu_ptr(&dm_hw_cpu_data); + + spin_lock_irqsave(&hw_data->drop_queue.lock, flags); + if (skb_queue_len(&hw_data->drop_queue) < net_dm_queue_len) + __skb_queue_tail(&hw_data->drop_queue, nskb); + else + goto unlock_free; + spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); + + schedule_work(&hw_data->dm_alert_work); + + return; + +unlock_free: + spin_unlock_irqrestore(&hw_data->drop_queue.lock, flags); + u64_stats_update_begin(&hw_data->stats.syncp); + hw_data->stats.dropped++; + u64_stats_update_end(&hw_data->stats.syncp); + net_dm_hw_metadata_free(n_hw_metadata); +free: + consume_skb(nskb); +} + +static const struct net_dm_alert_ops net_dm_alert_packet_ops = { + .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit, + .napi_poll_probe = net_dm_packet_trace_napi_poll_hit, + .work_item_func = net_dm_packet_work, + .hw_work_item_func = net_dm_hw_packet_work, + .hw_probe = net_dm_hw_packet_probe, +}; + +static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = { + [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops, + [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops, +}; + +void net_dm_hw_report(struct sk_buff *skb, + const struct net_dm_hw_metadata *hw_metadata) +{ + rcu_read_lock(); + + if (!monitor_hw) + goto out; + + net_dm_alert_ops_arr[net_dm_alert_mode]->hw_probe(skb, hw_metadata); + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(net_dm_hw_report); + +static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack) +{ + const struct net_dm_alert_ops *ops; + int cpu; + + if (monitor_hw) { + NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already enabled"); + return -EAGAIN; + } + + ops = net_dm_alert_ops_arr[net_dm_alert_mode]; + + if (!try_module_get(THIS_MODULE)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module"); + return -ENODEV; + } + + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); + struct net_dm_hw_entries *hw_entries; + + INIT_WORK(&hw_data->dm_alert_work, ops->hw_work_item_func); + timer_setup(&hw_data->send_timer, sched_send_work, 0); + hw_entries = net_dm_hw_reset_per_cpu_data(hw_data); + kfree(hw_entries); + } + + monitor_hw = true; + + return 0; +} + +static void net_dm_hw_monitor_stop(struct netlink_ext_ack *extack) +{ + int cpu; + + if (!monitor_hw) + NL_SET_ERR_MSG_MOD(extack, "Hardware monitoring already disabled"); + + monitor_hw = false; + + /* After this call returns we are guaranteed that no CPU is processing + * any hardware drops. + */ + synchronize_rcu(); + + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); + struct sk_buff *skb; + + del_timer_sync(&hw_data->send_timer); + cancel_work_sync(&hw_data->dm_alert_work); + while ((skb = __skb_dequeue(&hw_data->drop_queue))) { + struct net_dm_hw_metadata *hw_metadata; + + hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata; + net_dm_hw_metadata_free(hw_metadata); + consume_skb(skb); + } + } + + module_put(THIS_MODULE); +} + +static int net_dm_trace_on_set(struct netlink_ext_ack *extack) +{ + const struct net_dm_alert_ops *ops; + int cpu, rc; + + ops = net_dm_alert_ops_arr[net_dm_alert_mode]; + + if (!try_module_get(THIS_MODULE)) { + NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module"); + return -ENODEV; + } + + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); + struct sk_buff *skb; + + INIT_WORK(&data->dm_alert_work, ops->work_item_func); + timer_setup(&data->send_timer, sched_send_work, 0); + /* Allocate a new per-CPU skb for the summary alert message and + * free the old one which might contain stale data from + * previous tracing. */ - list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { - if (new_stat->dev == NULL) { - list_del_rcu(&new_stat->list); - kfree_rcu(new_stat, rcu); - } + skb = reset_per_cpu_data(data); + consume_skb(skb); + } + + rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint"); + goto err_module_put; + } + + rc = register_trace_napi_poll(ops->napi_poll_probe, NULL); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint"); + goto err_unregister_trace; + } + + return 0; + +err_unregister_trace: + unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); +err_module_put: + module_put(THIS_MODULE); + return rc; +} + +static void net_dm_trace_off_set(void) +{ + struct dm_hw_stat_delta *new_stat, *temp; + const struct net_dm_alert_ops *ops; + int cpu; + + ops = net_dm_alert_ops_arr[net_dm_alert_mode]; + + unregister_trace_napi_poll(ops->napi_poll_probe, NULL); + unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL); + + tracepoint_synchronize_unregister(); + + /* Make sure we do not send notifications to user space after request + * to stop tracing returns. + */ + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); + struct sk_buff *skb; + + del_timer_sync(&data->send_timer); + cancel_work_sync(&data->dm_alert_work); + while ((skb = __skb_dequeue(&data->drop_queue))) + consume_skb(skb); + } + + list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) { + if (new_stat->dev == NULL) { + list_del_rcu(&new_stat->list); + kfree_rcu(new_stat, rcu); } + } + + module_put(THIS_MODULE); +} + +static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack) +{ + int rc = 0; - module_put(THIS_MODULE); + if (state == trace_state) { + NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state"); + return -EAGAIN; + } + switch (state) { + case TRACE_ON: + rc = net_dm_trace_on_set(extack); + break; + case TRACE_OFF: + net_dm_trace_off_set(); break; default: rc = 1; @@ -288,30 +1141,331 @@ static int set_all_monitor_traces(int state) else rc = -EINPROGRESS; -out_unlock: - mutex_unlock(&trace_state_mutex); - return rc; } +static bool net_dm_is_monitoring(void) +{ + return trace_state == TRACE_ON || monitor_hw; +} + +static int net_dm_alert_mode_get_from_info(struct genl_info *info, + enum net_dm_alert_mode *p_alert_mode) +{ + u8 val; + + val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]); + + switch (val) { + case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */ + case NET_DM_ALERT_MODE_PACKET: + *p_alert_mode = val; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int net_dm_alert_mode_set(struct genl_info *info) +{ + struct netlink_ext_ack *extack = info->extack; + enum net_dm_alert_mode alert_mode; + int rc; + + if (!info->attrs[NET_DM_ATTR_ALERT_MODE]) + return 0; + + rc = net_dm_alert_mode_get_from_info(info, &alert_mode); + if (rc) { + NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode"); + return -EINVAL; + } + + net_dm_alert_mode = alert_mode; + + return 0; +} + +static void net_dm_trunc_len_set(struct genl_info *info) +{ + if (!info->attrs[NET_DM_ATTR_TRUNC_LEN]) + return; + + net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]); +} + +static void net_dm_queue_len_set(struct genl_info *info) +{ + if (!info->attrs[NET_DM_ATTR_QUEUE_LEN]) + return; + + net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]); +} static int net_dm_cmd_config(struct sk_buff *skb, struct genl_info *info) { - return -ENOTSUPP; + struct netlink_ext_ack *extack = info->extack; + int rc; + + if (net_dm_is_monitoring()) { + NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor during monitoring"); + return -EBUSY; + } + + rc = net_dm_alert_mode_set(info); + if (rc) + return rc; + + net_dm_trunc_len_set(info); + + net_dm_queue_len_set(info); + + return 0; +} + +static int net_dm_monitor_start(bool set_sw, bool set_hw, + struct netlink_ext_ack *extack) +{ + bool sw_set = false; + int rc; + + if (set_sw) { + rc = set_all_monitor_traces(TRACE_ON, extack); + if (rc) + return rc; + sw_set = true; + } + + if (set_hw) { + rc = net_dm_hw_monitor_start(extack); + if (rc) + goto err_monitor_hw; + } + + return 0; + +err_monitor_hw: + if (sw_set) + set_all_monitor_traces(TRACE_OFF, extack); + return rc; +} + +static void net_dm_monitor_stop(bool set_sw, bool set_hw, + struct netlink_ext_ack *extack) +{ + if (set_hw) + net_dm_hw_monitor_stop(extack); + if (set_sw) + set_all_monitor_traces(TRACE_OFF, extack); } static int net_dm_cmd_trace(struct sk_buff *skb, struct genl_info *info) { + bool set_sw = !!info->attrs[NET_DM_ATTR_SW_DROPS]; + bool set_hw = !!info->attrs[NET_DM_ATTR_HW_DROPS]; + struct netlink_ext_ack *extack = info->extack; + + /* To maintain backward compatibility, we start / stop monitoring of + * software drops if no flag is specified. + */ + if (!set_sw && !set_hw) + set_sw = true; + switch (info->genlhdr->cmd) { case NET_DM_CMD_START: - return set_all_monitor_traces(TRACE_ON); + return net_dm_monitor_start(set_sw, set_hw, extack); case NET_DM_CMD_STOP: - return set_all_monitor_traces(TRACE_OFF); + net_dm_monitor_stop(set_sw, set_hw, extack); + return 0; } - return -ENOTSUPP; + return -EOPNOTSUPP; +} + +static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info) +{ + void *hdr; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len)) + goto nla_put_failure; + + if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + int rc; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rc = net_dm_config_fill(msg, info); + if (rc) + goto free_msg; + + return genlmsg_reply(msg, info); + +free_msg: + nlmsg_free(msg); + return rc; +} + +static void net_dm_stats_read(struct net_dm_stats *stats) +{ + int cpu; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu); + struct net_dm_stats *cpu_stats = &data->stats; + unsigned int start; + u64 dropped; + + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + dropped = cpu_stats->dropped; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->dropped += dropped; + } +} + +static int net_dm_stats_put(struct sk_buff *msg) +{ + struct net_dm_stats stats; + struct nlattr *attr; + + net_dm_stats_read(&stats); + + attr = nla_nest_start(msg, NET_DM_ATTR_STATS); + if (!attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, + stats.dropped, NET_DM_ATTR_PAD)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static void net_dm_hw_stats_read(struct net_dm_stats *stats) +{ + int cpu; + + memset(stats, 0, sizeof(*stats)); + for_each_possible_cpu(cpu) { + struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu); + struct net_dm_stats *cpu_stats = &hw_data->stats; + unsigned int start; + u64 dropped; + + do { + start = u64_stats_fetch_begin_irq(&cpu_stats->syncp); + dropped = cpu_stats->dropped; + } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start)); + + stats->dropped += dropped; + } +} + +static int net_dm_hw_stats_put(struct sk_buff *msg) +{ + struct net_dm_stats stats; + struct nlattr *attr; + + net_dm_hw_stats_read(&stats); + + attr = nla_nest_start(msg, NET_DM_ATTR_HW_STATS); + if (!attr) + return -EMSGSIZE; + + if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED, + stats.dropped, NET_DM_ATTR_PAD)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + + return 0; + +nla_put_failure: + nla_nest_cancel(msg, attr); + return -EMSGSIZE; +} + +static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info) +{ + void *hdr; + int rc; + + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, + &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW); + if (!hdr) + return -EMSGSIZE; + + rc = net_dm_stats_put(msg); + if (rc) + goto nla_put_failure; + + rc = net_dm_hw_stats_put(msg); + if (rc) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + return 0; + +nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + int rc; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + rc = net_dm_stats_fill(msg, info); + if (rc) + goto free_msg; + + return genlmsg_reply(msg, info); + +free_msg: + nlmsg_free(msg); + return rc; } static int dropmon_net_event(struct notifier_block *ev_block, @@ -330,12 +1484,12 @@ static int dropmon_net_event(struct notifier_block *ev_block, new_stat->dev = dev; new_stat->last_rx = jiffies; - mutex_lock(&trace_state_mutex); + mutex_lock(&net_dm_mutex); list_add_rcu(&new_stat->list, &hw_stats_list); - mutex_unlock(&trace_state_mutex); + mutex_unlock(&net_dm_mutex); break; case NETDEV_UNREGISTER: - mutex_lock(&trace_state_mutex); + mutex_lock(&net_dm_mutex); list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) { if (new_stat->dev == dev) { new_stat->dev = NULL; @@ -346,18 +1500,28 @@ static int dropmon_net_event(struct notifier_block *ev_block, } } } - mutex_unlock(&trace_state_mutex); + mutex_unlock(&net_dm_mutex); break; } out: return NOTIFY_DONE; } +static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = { + [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 }, + [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 }, + [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 }, + [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 }, + [NET_DM_ATTR_SW_DROPS] = {. type = NLA_FLAG }, + [NET_DM_ATTR_HW_DROPS] = {. type = NLA_FLAG }, +}; + static const struct genl_ops dropmon_ops[] = { { .cmd = NET_DM_CMD_CONFIG, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_config, + .flags = GENL_ADMIN_PERM, }, { .cmd = NET_DM_CMD_START, @@ -369,12 +1533,38 @@ static const struct genl_ops dropmon_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = net_dm_cmd_trace, }, + { + .cmd = NET_DM_CMD_CONFIG_GET, + .doit = net_dm_cmd_config_get, + }, + { + .cmd = NET_DM_CMD_STATS_GET, + .doit = net_dm_cmd_stats_get, + }, }; +static int net_dm_nl_pre_doit(const struct genl_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + mutex_lock(&net_dm_mutex); + + return 0; +} + +static void net_dm_nl_post_doit(const struct genl_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + mutex_unlock(&net_dm_mutex); +} + static struct genl_family net_drop_monitor_family __ro_after_init = { .hdrsize = 0, .name = "NET_DM", .version = 2, + .maxattr = NET_DM_ATTR_MAX, + .policy = net_dm_nl_policy, + .pre_doit = net_dm_nl_pre_doit, + .post_doit = net_dm_nl_post_doit, .module = THIS_MODULE, .ops = dropmon_ops, .n_ops = ARRAY_SIZE(dropmon_ops), @@ -386,9 +1576,57 @@ static struct notifier_block dropmon_net_notifier = { .notifier_call = dropmon_net_event }; -static int __init init_net_drop_monitor(void) +static void __net_dm_cpu_data_init(struct per_cpu_dm_data *data) +{ + spin_lock_init(&data->lock); + skb_queue_head_init(&data->drop_queue); + u64_stats_init(&data->stats.syncp); +} + +static void __net_dm_cpu_data_fini(struct per_cpu_dm_data *data) +{ + WARN_ON(!skb_queue_empty(&data->drop_queue)); +} + +static void net_dm_cpu_data_init(int cpu) +{ + struct per_cpu_dm_data *data; + + data = &per_cpu(dm_cpu_data, cpu); + __net_dm_cpu_data_init(data); +} + +static void net_dm_cpu_data_fini(int cpu) { struct per_cpu_dm_data *data; + + data = &per_cpu(dm_cpu_data, cpu); + /* At this point, we should have exclusive access + * to this struct and can free the skb inside it. + */ + consume_skb(data->skb); + __net_dm_cpu_data_fini(data); +} + +static void net_dm_hw_cpu_data_init(int cpu) +{ + struct per_cpu_dm_data *hw_data; + + hw_data = &per_cpu(dm_hw_cpu_data, cpu); + __net_dm_cpu_data_init(hw_data); +} + +static void net_dm_hw_cpu_data_fini(int cpu) +{ + struct per_cpu_dm_data *hw_data; + + hw_data = &per_cpu(dm_hw_cpu_data, cpu); + kfree(hw_data->hw_entries); + __net_dm_cpu_data_fini(hw_data); +} + +static int __init init_net_drop_monitor(void) +{ int cpu, rc; pr_info("Initializing network drop monitor service\n"); @@ -414,14 +1652,10 @@ static int __init init_net_drop_monitor(void) rc = 0; for_each_possible_cpu(cpu) { - data = &per_cpu(dm_cpu_data, cpu); - INIT_WORK(&data->dm_alert_work, send_dm_alert); - timer_setup(&data->send_timer, sched_send_work, 0); - spin_lock_init(&data->lock); - reset_per_cpu_data(data); + net_dm_cpu_data_init(cpu); + net_dm_hw_cpu_data_init(cpu); } - goto out; out_unreg: @@ -432,7 +1666,6 @@ out: static void exit_net_drop_monitor(void) { - struct per_cpu_dm_data *data; int cpu; BUG_ON(unregister_netdevice_notifier(&dropmon_net_notifier)); @@ -440,19 +1673,11 @@ static void exit_net_drop_monitor(void) /* * Because of the module_get/put we do in the trace state change path * we are guarnateed not to have any current users when we get here - * all we need to do is make sure that we don't have any running timers - * or pending schedule calls */ for_each_possible_cpu(cpu) { - data = &per_cpu(dm_cpu_data, cpu); - del_timer_sync(&data->send_timer); - cancel_work_sync(&data->dm_alert_work); - /* - * At this point, we should have exclusive access - * to this struct and can free the skb inside it - */ - kfree_skb(data->skb); + net_dm_hw_cpu_data_fini(cpu); + net_dm_cpu_data_fini(cpu); } BUG_ON(genl_unregister_family(&net_drop_monitor_family)); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6288e69e94fc..c763106c73fc 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -133,6 +133,7 @@ phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", + [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down", }; static int ethtool_get_features(struct net_device *dev, void __user *useraddr) @@ -2451,6 +2452,11 @@ static int ethtool_phy_tunable_valid(const struct ethtool_tunable *tuna) tuna->type_id != ETHTOOL_TUNABLE_U8) return -EINVAL; break; + case ETHTOOL_PHY_EDPD: + if (tuna->len != sizeof(u16) || + tuna->type_id != ETHTOOL_TUNABLE_U16) + return -EINVAL; + break; default: return -EINVAL; } diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 13a40b831d6d..470a606d5e8d 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -5,8 +5,15 @@ #include <linux/module.h> #include <linux/init.h> #include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/fib_notifier.h> +static unsigned int fib_notifier_net_id; + +struct fib_notifier_net { + struct list_head fib_notifier_ops; +}; + static ATOMIC_NOTIFIER_HEAD(fib_chain); int call_fib_notifier(struct notifier_block *nb, struct net *net, @@ -34,6 +41,7 @@ EXPORT_SYMBOL(call_fib_notifiers); static unsigned int fib_seq_sum(void) { + struct fib_notifier_net *fn_net; struct fib_notifier_ops *ops; unsigned int fib_seq = 0; struct net *net; @@ -41,8 +49,9 @@ static unsigned int fib_seq_sum(void) rtnl_lock(); down_read(&net_rwsem); for_each_net(net) { + fn_net = net_generic(net, fib_notifier_net_id); rcu_read_lock(); - list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { + list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { if (!try_module_get(ops->owner)) continue; fib_seq += ops->fib_seq_read(net); @@ -58,9 +67,10 @@ static unsigned int fib_seq_sum(void) static int fib_net_dump(struct net *net, struct notifier_block *nb) { + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); struct fib_notifier_ops *ops; - list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { + list_for_each_entry_rcu(ops, &fn_net->fib_notifier_ops, list) { int err; if (!try_module_get(ops->owner)) @@ -127,12 +137,13 @@ EXPORT_SYMBOL(unregister_fib_notifier); static int __fib_notifier_ops_register(struct fib_notifier_ops *ops, struct net *net) { + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); struct fib_notifier_ops *o; - list_for_each_entry(o, &net->fib_notifier_ops, list) + list_for_each_entry(o, &fn_net->fib_notifier_ops, list) if (ops->family == o->family) return -EEXIST; - list_add_tail_rcu(&ops->list, &net->fib_notifier_ops); + list_add_tail_rcu(&ops->list, &fn_net->fib_notifier_ops); return 0; } @@ -167,18 +178,24 @@ EXPORT_SYMBOL(fib_notifier_ops_unregister); static int __net_init fib_notifier_net_init(struct net *net) { - INIT_LIST_HEAD(&net->fib_notifier_ops); + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); + + INIT_LIST_HEAD(&fn_net->fib_notifier_ops); return 0; } static void __net_exit fib_notifier_net_exit(struct net *net) { - WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops)); + struct fib_notifier_net *fn_net = net_generic(net, fib_notifier_net_id); + + WARN_ON_ONCE(!list_empty(&fn_net->fib_notifier_ops)); } static struct pernet_operations fib_notifier_net_ops = { .init = fib_notifier_net_init, .exit = fib_notifier_net_exit, + .id = &fib_notifier_net_id, + .size = sizeof(struct fib_notifier_net), }; static int __init fib_notifier_init(void) diff --git a/net/core/filter.c b/net/core/filter.c index 4c6a252d4212..ed6563622ce3 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3517,7 +3517,8 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, int err; switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: { + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: { struct bpf_dtab_netdev *dst = fwd; err = dev_map_enqueue(dst, xdp, dev_rx); @@ -3554,6 +3555,7 @@ void xdp_do_flush_map(void) if (map) { switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_DEVMAP_HASH: __dev_map_flush(map); break; case BPF_MAP_TYPE_CPUMAP: @@ -3574,6 +3576,8 @@ static inline void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: return __dev_map_lookup_elem(map, index); + case BPF_MAP_TYPE_DEVMAP_HASH: + return __dev_map_hash_lookup_elem(map, index); case BPF_MAP_TYPE_CPUMAP: return __cpu_map_lookup_elem(map, index); case BPF_MAP_TYPE_XSKMAP: @@ -3655,7 +3659,8 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + if (map->map_type == BPF_MAP_TYPE_DEVMAP || + map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { struct bpf_dtab_netdev *dst = fwd; err = dev_map_generic_redirect(dst, skb, xdp_prog); @@ -5850,6 +5855,75 @@ static const struct bpf_func_proto bpf_tcp_check_syncookie_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, + struct tcphdr *, th, u32, th_len) +{ +#ifdef CONFIG_SYN_COOKIES + u32 cookie; + u16 mss; + + if (unlikely(th_len < sizeof(*th) || th_len != th->doff * 4)) + return -EINVAL; + + if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN) + return -EINVAL; + + if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies) + return -ENOENT; + + if (!th->syn || th->ack || th->fin || th->rst) + return -EINVAL; + + if (unlikely(iph_len < sizeof(struct iphdr))) + return -EINVAL; + + /* Both struct iphdr and struct ipv6hdr have the version field at the + * same offset so we can cast to the shorter header (struct iphdr). + */ + switch (((struct iphdr *)iph)->version) { + case 4: + if (sk->sk_family == AF_INET6 && sk->sk_ipv6only) + return -EINVAL; + + mss = tcp_v4_get_syncookie(sk, iph, th, &cookie); + break; + +#if IS_BUILTIN(CONFIG_IPV6) + case 6: + if (unlikely(iph_len < sizeof(struct ipv6hdr))) + return -EINVAL; + + if (sk->sk_family != AF_INET6) + return -EINVAL; + + mss = tcp_v6_get_syncookie(sk, iph, th, &cookie); + break; +#endif /* CONFIG_IPV6 */ + + default: + return -EPROTONOSUPPORT; + } + if (mss == 0) + return -ENOENT; + + return cookie | ((u64)mss << 32); +#else + return -EOPNOTSUPP; +#endif /* CONFIG_SYN_COOKIES */ +} + +static const struct bpf_func_proto bpf_tcp_gen_syncookie_proto = { + .func = bpf_tcp_gen_syncookie, + .gpl_only = true, /* __cookie_v*_init_sequence() is GPL */ + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_SOCK_COMMON, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + #endif /* CONFIG_INET */ bool bpf_helper_changes_pkt_data(void *func) @@ -5999,6 +6073,8 @@ sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_socket_cookie_proto; case BPF_FUNC_get_socket_uid: return &bpf_get_socket_uid_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; default: return bpf_base_func_proto(func_id); } @@ -6019,6 +6095,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; #ifdef CONFIG_SOCK_CGROUP_DATA case BPF_FUNC_skb_cgroup_id: return &bpf_skb_cgroup_id_proto; @@ -6135,6 +6213,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_tcp_check_syncookie_proto; case BPF_FUNC_skb_ecn_set_ce: return &bpf_skb_ecn_set_ce_proto; + case BPF_FUNC_tcp_gen_syncookie: + return &bpf_tcp_gen_syncookie_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6174,6 +6254,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_xdp_skc_lookup_tcp_proto; case BPF_FUNC_tcp_check_syncookie: return &bpf_tcp_check_syncookie_proto; + case BPF_FUNC_tcp_gen_syncookie: + return &bpf_tcp_gen_syncookie_proto; #endif default: return bpf_base_func_proto(func_id); @@ -6267,6 +6349,8 @@ sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_redirect_map_proto; case BPF_FUNC_sk_redirect_hash: return &bpf_sk_redirect_hash_proto; + case BPF_FUNC_perf_event_output: + return &bpf_skb_event_output_proto; #ifdef CONFIG_INET case BPF_FUNC_sk_lookup_tcp: return &bpf_sk_lookup_tcp_proto; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 2470b4b404e6..7c09d87d3269 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -737,6 +737,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, struct flow_dissector_key_basic *key_basic; struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; + struct flow_dissector_key_tags *key_tags; key_control = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_CONTROL, @@ -781,10 +782,18 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys, key_ports->src = flow_keys->sport; key_ports->dst = flow_keys->dport; } + + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL)) { + key_tags = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_FLOW_LABEL, + target_container); + key_tags->flow_label = ntohl(flow_keys->flow_label); + } } bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, - __be16 proto, int nhoff, int hlen) + __be16 proto, int nhoff, int hlen, unsigned int flags) { struct bpf_flow_keys *flow_keys = ctx->flow_keys; u32 result; @@ -795,6 +804,14 @@ bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx, flow_keys->nhoff = nhoff; flow_keys->thoff = flow_keys->nhoff; + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG != + (int)FLOW_DISSECTOR_F_PARSE_1ST_FRAG); + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL != + (int)FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL); + BUILD_BUG_ON((int)BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP != + (int)FLOW_DISSECTOR_F_STOP_AT_ENCAP); + flow_keys->flags = flags; + preempt_disable(); result = BPF_PROG_RUN(prog, ctx); preempt_enable(); @@ -914,7 +931,7 @@ bool __skb_flow_dissect(const struct net *net, } ret = bpf_flow_dissect(attached, &ctx, n_proto, nhoff, - hlen); + hlen, flags); __skb_flow_bpf_to_target(&flow_keys, flow_dissector, target_container); rcu_read_unlock(); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d63b970784dc..cf52d9c422fa 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -2,6 +2,8 @@ #include <linux/kernel.h> #include <linux/slab.h> #include <net/flow_offload.h> +#include <linux/rtnetlink.h> +#include <linux/mutex.h> struct flow_rule *flow_rule_alloc(unsigned int num_actions) { @@ -280,3 +282,242 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f, } } EXPORT_SYMBOL(flow_block_cb_setup_simple); + +static LIST_HEAD(block_ing_cb_list); + +static struct rhashtable indr_setup_block_ht; + +struct flow_indr_block_cb { + struct list_head list; + void *cb_priv; + flow_indr_block_bind_cb_t *cb; + void *cb_ident; +}; + +struct flow_indr_block_dev { + struct rhash_head ht_node; + struct net_device *dev; + unsigned int refcnt; + struct list_head cb_list; +}; + +static const struct rhashtable_params flow_indr_setup_block_ht_params = { + .key_offset = offsetof(struct flow_indr_block_dev, dev), + .head_offset = offsetof(struct flow_indr_block_dev, ht_node), + .key_len = sizeof(struct net_device *), +}; + +static struct flow_indr_block_dev * +flow_indr_block_dev_lookup(struct net_device *dev) +{ + return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, + flow_indr_setup_block_ht_params); +} + +static struct flow_indr_block_dev * +flow_indr_block_dev_get(struct net_device *dev) +{ + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (indr_dev) + goto inc_ref; + + indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); + if (!indr_dev) + return NULL; + + INIT_LIST_HEAD(&indr_dev->cb_list); + indr_dev->dev = dev; + if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, + flow_indr_setup_block_ht_params)) { + kfree(indr_dev); + return NULL; + } + +inc_ref: + indr_dev->refcnt++; + return indr_dev; +} + +static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev) +{ + if (--indr_dev->refcnt) + return; + + rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, + flow_indr_setup_block_ht_params); + kfree(indr_dev); +} + +static struct flow_indr_block_cb * +flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev, + flow_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + if (indr_block_cb->cb == cb && + indr_block_cb->cb_ident == cb_ident) + return indr_block_cb; + return NULL; +} + +static struct flow_indr_block_cb * +flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + + indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (indr_block_cb) + return ERR_PTR(-EEXIST); + + indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); + if (!indr_block_cb) + return ERR_PTR(-ENOMEM); + + indr_block_cb->cb_priv = cb_priv; + indr_block_cb->cb = cb; + indr_block_cb->cb_ident = cb_ident; + list_add(&indr_block_cb->list, &indr_dev->cb_list); + + return indr_block_cb; +} + +static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb) +{ + list_del(&indr_block_cb->list); + kfree(indr_block_cb); +} + +static DEFINE_MUTEX(flow_indr_block_ing_cb_lock); + +static void flow_block_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) +{ + struct flow_indr_block_ing_entry *entry; + + mutex_lock(&flow_indr_block_ing_cb_lock); + list_for_each_entry(entry, &block_ing_cb_list, list) { + entry->cb(dev, cb, cb_priv, command); + } + mutex_unlock(&flow_indr_block_ing_cb_lock); +} + +int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + int err; + + indr_dev = flow_indr_block_dev_get(dev); + if (!indr_dev) + return -ENOMEM; + + indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); + err = PTR_ERR_OR_ZERO(indr_block_cb); + if (err) + goto err_dev_put; + + flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv, + FLOW_BLOCK_BIND); + + return 0; + +err_dev_put: + flow_indr_block_dev_put(indr_dev); + return err; +} +EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register); + +int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + int err; + + rtnl_lock(); + err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident); + rtnl_unlock(); + + return err; +} +EXPORT_SYMBOL_GPL(flow_indr_block_cb_register); + +void __flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident); + if (!indr_block_cb) + return; + + flow_block_ing_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv, + FLOW_BLOCK_UNBIND); + + flow_indr_block_cb_del(indr_block_cb); + flow_indr_block_dev_put(indr_dev); +} +EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister); + +void flow_indr_block_cb_unregister(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_ident) +{ + rtnl_lock(); + __flow_indr_block_cb_unregister(dev, cb, cb_ident); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister); + +void flow_indr_block_call(struct net_device *dev, + struct flow_block_offload *bo, + enum flow_block_command command) +{ + struct flow_indr_block_cb *indr_block_cb; + struct flow_indr_block_dev *indr_dev; + + indr_dev = flow_indr_block_dev_lookup(dev); + if (!indr_dev) + return; + + list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) + indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, + bo); +} +EXPORT_SYMBOL_GPL(flow_indr_block_call); + +void flow_indr_add_block_ing_cb(struct flow_indr_block_ing_entry *entry) +{ + mutex_lock(&flow_indr_block_ing_cb_lock); + list_add_tail(&entry->list, &block_ing_cb_list); + mutex_unlock(&flow_indr_block_ing_cb_lock); +} +EXPORT_SYMBOL_GPL(flow_indr_add_block_ing_cb); + +void flow_indr_del_block_ing_cb(struct flow_indr_block_ing_entry *entry) +{ + mutex_lock(&flow_indr_block_ing_cb_lock); + list_del(&entry->list); + mutex_unlock(&flow_indr_block_ing_cb_lock); +} +EXPORT_SYMBOL_GPL(flow_indr_del_block_ing_cb); + +static int __init init_flow_indr_rhashtable(void) +{ + return rhashtable_init(&indr_setup_block_ht, + &flow_indr_setup_block_ht_params); +} +subsys_initcall(init_flow_indr_rhashtable); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f79e61c570ea..5480edff0c86 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3033,7 +3033,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) struct net *net = seq_file_net(seq); struct neigh_hash_table *nht = state->nht; struct neighbour *n = NULL; - int bucket = state->bucket; + int bucket; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 3272dc7a8c81..5bc65587f1c4 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -61,7 +61,7 @@ static int page_pool_init(struct page_pool *pool, struct page_pool *page_pool_create(const struct page_pool_params *params) { struct page_pool *pool; - int err = 0; + int err; pool = kzalloc_node(sizeof(*pool), GFP_KERNEL, params->nid); if (!pool) @@ -82,12 +82,9 @@ EXPORT_SYMBOL(page_pool_create); static struct page *__page_pool_get_cached(struct page_pool *pool) { struct ptr_ring *r = &pool->ring; + bool refill = false; struct page *page; - /* Quicker fallback, avoid locks when ring is empty */ - if (__ptr_ring_empty(r)) - return NULL; - /* Test for safe-context, caller should provide this guarantee */ if (likely(in_serving_softirq())) { if (likely(pool->alloc.count)) { @@ -95,27 +92,23 @@ static struct page *__page_pool_get_cached(struct page_pool *pool) page = pool->alloc.cache[--pool->alloc.count]; return page; } - /* Slower-path: Alloc array empty, time to refill - * - * Open-coded bulk ptr_ring consumer. - * - * Discussion: the ring consumer lock is not really - * needed due to the softirq/NAPI protection, but - * later need the ability to reclaim pages on the - * ring. Thus, keeping the locks. - */ - spin_lock(&r->consumer_lock); - while ((page = __ptr_ring_consume(r))) { - if (pool->alloc.count == PP_ALLOC_CACHE_REFILL) - break; - pool->alloc.cache[pool->alloc.count++] = page; - } - spin_unlock(&r->consumer_lock); - return page; + refill = true; } - /* Slow-path: Get page from locked ring queue */ - page = ptr_ring_consume(&pool->ring); + /* Quicker fallback, avoid locks when ring is empty */ + if (__ptr_ring_empty(r)) + return NULL; + + /* Slow-path: Get page from locked ring queue, + * refill alloc array if requested. + */ + spin_lock(&r->consumer_lock); + page = __ptr_ring_consume(r); + if (refill) + pool->alloc.count = __ptr_ring_consume_batched(r, + pool->alloc.cache, + PP_ALLOC_CACHE_REFILL); + spin_unlock(&r->consumer_lock); return page; } diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 1d0c1b4886d7..48b1e429857c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2650,7 +2650,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } get_page(pkt_dev->page); skb_frag_set_page(skb, i, pkt_dev->page); - skb_shinfo(skb)->frags[i].page_offset = 0; + skb_frag_off_set(&skb_shinfo(skb)->frags[i], 0); /*last fragment, fill rest of data*/ if (i == (frags - 1)) skb_frag_size_set(&skb_shinfo(skb)->frags[i], diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 982d8d12830e..f12e8a050edb 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -785,7 +785,7 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) struct page *p; u8 *vaddr; - skb_frag_foreach_page(frag, frag->page_offset, + skb_frag_foreach_page(frag, skb_frag_off(frag), skb_frag_size(frag), p, p_off, p_len, copied) { seg_len = min_t(int, p_len, len); @@ -1375,7 +1375,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) struct page *p; u8 *vaddr; - skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f), + skb_frag_foreach_page(f, skb_frag_off(f), skb_frag_size(f), p, p_off, p_len, copied) { u32 copy, done = 0; vaddr = kmap_atomic(p); @@ -2144,10 +2144,12 @@ pull_pages: skb_frag_unref(skb, i); eat -= size; } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; + skb_frag_t *frag = &skb_shinfo(skb)->frags[k]; + + *frag = skb_shinfo(skb)->frags[i]; if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + skb_frag_off_add(frag, eat); + skb_frag_size_sub(frag, eat); if (!i) goto end; eat = 0; @@ -2219,7 +2221,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) copy = len; skb_frag_foreach_page(f, - f->page_offset + offset - start, + skb_frag_off(f) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(to + copied, vaddr + p_off, p_len); @@ -2395,7 +2397,7 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(skb_frag_page(f), - f->page_offset, skb_frag_size(f), + skb_frag_off(f), skb_frag_size(f), offset, len, spd, false, sk, pipe)) return true; } @@ -2485,20 +2487,20 @@ do_frag_list: for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; - if (offset < frag->size) + if (offset < skb_frag_size(frag)) break; - offset -= frag->size; + offset -= skb_frag_size(frag); } for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; - slen = min_t(size_t, len, frag->size - offset); + slen = min_t(size_t, len, skb_frag_size(frag) - offset); while (slen) { - ret = kernel_sendpage_locked(sk, frag->page.p, - frag->page_offset + offset, + ret = kernel_sendpage_locked(sk, skb_frag_page(frag), + skb_frag_off(frag) + offset, slen, MSG_DONTWAIT); if (ret <= 0) goto error; @@ -2580,7 +2582,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); memcpy(vaddr + p_off, from + copied, p_len); @@ -2660,7 +2662,7 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = INDIRECT_CALL_1(ops->update, @@ -2759,7 +2761,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, copy = len; skb_frag_foreach_page(frag, - frag->page_offset + offset - start, + skb_frag_off(frag) + offset - start, copy, p, p_off, p_len, copied) { vaddr = kmap_atomic(p); csum2 = csum_partial_copy_nocheck(vaddr + p_off, @@ -2975,11 +2977,15 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) skb_zerocopy_clone(to, from, GFP_ATOMIC); for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { + int size; + if (!len) break; skb_shinfo(to)->frags[j] = skb_shinfo(from)->frags[i]; - skb_shinfo(to)->frags[j].size = min_t(int, skb_shinfo(to)->frags[j].size, len); - len -= skb_shinfo(to)->frags[j].size; + size = min_t(int, skb_frag_size(&skb_shinfo(to)->frags[j]), + len); + skb_frag_size_set(&skb_shinfo(to)->frags[j], size); + len -= size; skb_frag_ref(to, j); j++; } @@ -3230,7 +3236,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, * 2. Split is accurately. We make this. */ skb_frag_ref(skb, i); - skb_shinfo(skb1)->frags[0].page_offset += len - pos; + skb_frag_off_add(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; @@ -3293,7 +3299,7 @@ static int skb_prepare_for_shift(struct sk_buff *skb) int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) { int from, to, merge, todo; - struct skb_frag_struct *fragfrom, *fragto; + skb_frag_t *fragfrom, *fragto; BUG_ON(shiftlen > skb->len); @@ -3312,7 +3318,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) */ if (!to || !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), - fragfrom->page_offset)) { + skb_frag_off(fragfrom))) { merge = -1; } else { merge = to - 1; @@ -3329,7 +3335,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) skb_frag_size_add(fragto, shiftlen); skb_frag_size_sub(fragfrom, shiftlen); - fragfrom->page_offset += shiftlen; + skb_frag_off_add(fragfrom, shiftlen); goto onlymerged; } @@ -3360,11 +3366,11 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) } else { __skb_frag_ref(fragfrom); - fragto->page = fragfrom->page; - fragto->page_offset = fragfrom->page_offset; + skb_frag_page_copy(fragto, fragfrom); + skb_frag_off_copy(fragto, fragfrom); skb_frag_size_set(fragto, todo); - fragfrom->page_offset += todo; + skb_frag_off_add(fragfrom, todo); skb_frag_size_sub(fragfrom, todo); todo = 0; @@ -3489,7 +3495,7 @@ next_skb: if (!st->frag_data) st->frag_data = kmap_atomic(skb_frag_page(frag)); - *data = (u8 *) st->frag_data + frag->page_offset + + *data = (u8 *) st->frag_data + skb_frag_off(frag) + (abs_offset - st->stepped_offset); return block_limit - abs_offset; @@ -3625,10 +3631,10 @@ static inline skb_frag_t skb_head_frag_to_page_desc(struct sk_buff *frag_skb) struct page *page; page = virt_to_head_page(frag_skb->head); - head_frag.page.p = page; - head_frag.page_offset = frag_skb->data - - (unsigned char *)page_address(page); - head_frag.size = skb_headlen(frag_skb); + __skb_frag_set_page(&head_frag, page); + skb_frag_off_set(&head_frag, frag_skb->data - + (unsigned char *)page_address(page)); + skb_frag_size_set(&head_frag, skb_headlen(frag_skb)); return head_frag; } @@ -3890,7 +3896,7 @@ normal: size = skb_frag_size(nskb_frag); if (pos < offset) { - nskb_frag->page_offset += offset - pos; + skb_frag_off_add(nskb_frag, offset - pos); skb_frag_size_sub(nskb_frag, offset - pos); } @@ -4011,7 +4017,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) *--frag = *--frag2; } while (--i); - frag->page_offset += offset; + skb_frag_off_add(frag, offset); skb_frag_size_sub(frag, offset); /* all fragments truesize : remove (head size + sk_buff) */ @@ -4040,8 +4046,8 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) pinfo->nr_frags = nr_frags + 1 + skbinfo->nr_frags; - frag->page.p = page; - frag->page_offset = first_offset; + __skb_frag_set_page(frag, page); + skb_frag_off_set(frag, first_offset); skb_frag_size_set(frag, first_size); memcpy(frag + 1, skbinfo->frags, sizeof(*frag) * skbinfo->nr_frags); @@ -4057,7 +4063,7 @@ merge: if (offset > headlen) { unsigned int eat = offset - headlen; - skbinfo->frags[0].page_offset += eat; + skb_frag_off_add(&skbinfo->frags[0], eat); skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; @@ -4100,6 +4106,9 @@ static const u8 skb_ext_type_len[] = { #ifdef CONFIG_XFRM [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path), #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext), +#endif }; static __always_inline unsigned int skb_ext_total_length(void) @@ -4111,6 +4120,9 @@ static __always_inline unsigned int skb_ext_total_length(void) #ifdef CONFIG_XFRM skb_ext_type_len[SKB_EXT_SEC_PATH] + #endif +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + skb_ext_type_len[TC_SKB_EXT] + +#endif 0; } @@ -4182,7 +4194,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len, if (copy > len) copy = len; sg_set_page(&sg[elt], skb_frag_page(frag), copy, - frag->page_offset+offset-start); + skb_frag_off(frag) + offset - start); elt++; if (!(len -= copy)) return elt; @@ -5853,7 +5865,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, * where splitting is expensive. * 2. Split is accurately. We make this. */ - shinfo->frags[0].page_offset += off - pos; + skb_frag_off_add(&shinfo->frags[0], off - pos); skb_frag_size_sub(&shinfo->frags[0], off - pos); } skb_frag_ref(skb, i); diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 6832eeb4b785..cf390e0aa73d 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -190,8 +190,7 @@ static int __sk_msg_free(struct sock *sk, struct sk_msg *msg, u32 i, sk_msg_check_to_free(msg, i, msg->sg.size); sge = sk_msg_elem(msg, i); } - if (msg->skb) - consume_skb(msg->skb); + consume_skb(msg->skb); sk_msg_init(msg); return freed; } diff --git a/net/core/sock.c b/net/core/sock.c index 545fac19a711..07863edbe6fc 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1851,9 +1851,12 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) goto out; } RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL); -#ifdef CONFIG_BPF_SYSCALL - RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); -#endif + + if (bpf_sk_storage_clone(sk, newsk)) { + sk_free_unlock_clone(newsk); + newsk = NULL; + goto out; + } newsk->sk_err = 0; newsk->sk_err_soft = 0; diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 50916f9bc4f2..eb114ee419b6 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -345,7 +345,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, return -EINVAL; if (unlikely(idx >= map->max_entries)) return -E2BIG; - if (unlikely(icsk->icsk_ulp_data)) + if (unlikely(rcu_access_pointer(icsk->icsk_ulp_data))) return -EINVAL; link = sk_psock_init_link(); diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index 9408f9264d05..f3ceec93f392 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -295,8 +295,19 @@ struct sock *reuseport_select_sock(struct sock *sk, select_by_hash: /* no bpf or invalid bpf result: fall back to hash usage */ - if (!sk2) - sk2 = reuse->socks[reciprocal_scale(hash, socks)]; + if (!sk2) { + int i, j; + + i = j = reciprocal_scale(hash, socks); + while (reuse->socks[i]->sk_state == TCP_ESTABLISHED) { + i++; + if (i >= reuse->num_socks) + i = 0; + if (i == j) + goto out; + } + sk2 = reuse->socks[i]; + } } out: diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 8da5b3a54dac..eb29e5adc84d 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -567,6 +567,14 @@ static struct ctl_table net_core_table[] = { .mode = 0644, .proc_handler = proc_do_static_key, }, + { + .procname = "gro_normal_batch", + .data = &gro_normal_batch, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ONE, + }, { } }; diff --git a/net/core/tso.c b/net/core/tso.c index 43f4eba61933..d4d5c077ad72 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -55,8 +55,8 @@ void tso_build_data(struct sk_buff *skb, struct tso_t *tso, int size) skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; /* Move to next segment */ - tso->size = frag->size; - tso->data = page_address(frag->page.p) + frag->page_offset; + tso->size = skb_frag_size(frag); + tso->data = skb_frag_address(frag); tso->next_frag_idx++; } } @@ -79,8 +79,8 @@ void tso_start(struct sk_buff *skb, struct tso_t *tso) skb_frag_t *frag = &skb_shinfo(skb)->frags[tso->next_frag_idx]; /* Move to next segment */ - tso->size = frag->size; - tso->data = page_address(frag->page.p) + frag->page_offset; + tso->size = skb_frag_size(frag); + tso->data = skb_frag_address(frag); tso->next_frag_idx++; } } diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index 6e942dda1bcd..29e2bd5cc5af 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -73,23 +73,11 @@ config NET_DSA_TAG_MTK Say Y or M if you want to enable support for tagging frames for Mediatek switches. -config NET_DSA_TAG_KSZ_COMMON - tristate - default n - config NET_DSA_TAG_KSZ - tristate "Tag driver for Microchip 9893 family of switches" - select NET_DSA_TAG_KSZ_COMMON - help - Say Y if you want to enable support for tagging frames for the - Microchip 9893 family of switches. - -config NET_DSA_TAG_KSZ9477 - tristate "Tag driver for Microchip 9477 family of switches" - select NET_DSA_TAG_KSZ_COMMON + tristate "Tag driver for Microchip 8795/9477/9893 families of switches" help Say Y if you want to enable support for tagging frames for the - Microchip 9477 family of switches. + Microchip 8795/9477/9893 families of switches. config NET_DSA_TAG_QCA tristate "Tag driver for Qualcomm Atheros QCA8K switches" diff --git a/net/dsa/Makefile b/net/dsa/Makefile index c342f54715ba..2c6d286f0511 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -9,7 +9,7 @@ obj-$(CONFIG_NET_DSA_TAG_BRCM_COMMON) += tag_brcm.o obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o obj-$(CONFIG_NET_DSA_TAG_GSWIP) += tag_gswip.o -obj-$(CONFIG_NET_DSA_TAG_KSZ_COMMON) += tag_ksz.o +obj-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o obj-$(CONFIG_NET_DSA_TAG_LAN9303) += tag_lan9303.o obj-$(CONFIG_NET_DSA_TAG_MTK) += tag_mtk.o obj-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 3abd173ebacb..73002022c9d8 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -254,88 +254,109 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) static int dsa_port_setup(struct dsa_port *dp) { - enum devlink_port_flavour flavour; struct dsa_switch *ds = dp->ds; struct dsa_switch_tree *dst = ds->dst; + const unsigned char *id = (const unsigned char *)&dst->index; + const unsigned char len = sizeof(dst->index); + struct devlink_port *dlp = &dp->devlink_port; + bool dsa_port_link_registered = false; + bool devlink_port_registered = false; + struct devlink *dl = ds->devlink; + bool dsa_port_enabled = false; int err = 0; - if (dp->type == DSA_PORT_TYPE_UNUSED) - return 0; - - memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); - dp->mac = of_get_mac_address(dp->dn); - - switch (dp->type) { - case DSA_PORT_TYPE_CPU: - flavour = DEVLINK_PORT_FLAVOUR_CPU; - break; - case DSA_PORT_TYPE_DSA: - flavour = DEVLINK_PORT_FLAVOUR_DSA; - break; - case DSA_PORT_TYPE_USER: /* fall-through */ - default: - flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - break; - } - - /* dp->index is used now as port_number. However - * CPU and DSA ports should have separate numbering - * independent from front panel port numbers. - */ - devlink_port_attrs_set(&dp->devlink_port, flavour, - dp->index, false, 0, - (const char *) &dst->index, sizeof(dst->index)); - err = devlink_port_register(ds->devlink, &dp->devlink_port, - dp->index); - if (err) - return err; - switch (dp->type) { case DSA_PORT_TYPE_UNUSED: + dsa_port_disable(dp); break; case DSA_PORT_TYPE_CPU: + memset(dlp, 0, sizeof(*dlp)); + devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_CPU, + dp->index, false, 0, id, len); + err = devlink_port_register(dl, dlp, dp->index); + if (err) + break; + devlink_port_registered = true; + err = dsa_port_link_register_of(dp); if (err) - dev_err(ds->dev, "failed to setup link for port %d.%d\n", - ds->index, dp->index); + break; + dsa_port_link_registered = true; + + err = dsa_port_enable(dp, NULL); + if (err) + break; + dsa_port_enabled = true; + break; case DSA_PORT_TYPE_DSA: + memset(dlp, 0, sizeof(*dlp)); + devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_DSA, + dp->index, false, 0, id, len); + err = devlink_port_register(dl, dlp, dp->index); + if (err) + break; + devlink_port_registered = true; + err = dsa_port_link_register_of(dp); if (err) - dev_err(ds->dev, "failed to setup link for port %d.%d\n", - ds->index, dp->index); + break; + dsa_port_link_registered = true; + + err = dsa_port_enable(dp, NULL); + if (err) + break; + dsa_port_enabled = true; + break; case DSA_PORT_TYPE_USER: + memset(dlp, 0, sizeof(*dlp)); + devlink_port_attrs_set(dlp, DEVLINK_PORT_FLAVOUR_PHYSICAL, + dp->index, false, 0, id, len); + err = devlink_port_register(dl, dlp, dp->index); + if (err) + break; + devlink_port_registered = true; + + dp->mac = of_get_mac_address(dp->dn); err = dsa_slave_create(dp); if (err) - dev_err(ds->dev, "failed to create slave for port %d.%d\n", - ds->index, dp->index); - else - devlink_port_type_eth_set(&dp->devlink_port, dp->slave); + break; + + devlink_port_type_eth_set(dlp, dp->slave); break; } - if (err) - devlink_port_unregister(&dp->devlink_port); + if (err && dsa_port_enabled) + dsa_port_disable(dp); + if (err && dsa_port_link_registered) + dsa_port_link_unregister_of(dp); + if (err && devlink_port_registered) + devlink_port_unregister(dlp); return err; } static void dsa_port_teardown(struct dsa_port *dp) { - if (dp->type != DSA_PORT_TYPE_UNUSED) - devlink_port_unregister(&dp->devlink_port); + struct devlink_port *dlp = &dp->devlink_port; switch (dp->type) { case DSA_PORT_TYPE_UNUSED: break; case DSA_PORT_TYPE_CPU: + dsa_port_disable(dp); dsa_tag_driver_put(dp->tag_ops); - /* fall-through */ + devlink_port_unregister(dlp); + dsa_port_link_unregister_of(dp); + break; case DSA_PORT_TYPE_DSA: + dsa_port_disable(dp); + devlink_port_unregister(dlp); dsa_port_link_unregister_of(dp); break; case DSA_PORT_TYPE_USER: + devlink_port_unregister(dlp); if (dp->slave) { dsa_slave_destroy(dp->slave); dp->slave = NULL; @@ -623,6 +644,8 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) tag_protocol = ds->ops->get_tag_protocol(ds, dp->index); tag_ops = dsa_tag_driver_get(tag_protocol); if (IS_ERR(tag_ops)) { + if (PTR_ERR(tag_ops) == -ENOPROTOOPT) + return -EPROBE_DEFER; dev_warn(ds->dev, "No tagger for this switch\n"); return PTR_ERR(tag_ops); } @@ -832,20 +855,6 @@ struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) if (!ds) return NULL; - /* We avoid allocating memory outside dsa_switch - * if it is not needed. - */ - if (n <= sizeof(ds->_bitmap) * 8) { - ds->bitmap = &ds->_bitmap; - } else { - ds->bitmap = devm_kcalloc(dev, - BITS_TO_LONGS(n), - sizeof(unsigned long), - GFP_KERNEL); - if (unlikely(!ds->bitmap)) - return NULL; - } - ds->dev = dev; ds->num_ports = n; diff --git a/net/dsa/master.c b/net/dsa/master.c index 4b52f8bac5e1..a8e52c9967f4 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -8,6 +8,70 @@ #include "dsa_priv.h" +static int dsa_master_get_regs_len(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int ret = 0; + int len; + + if (ops->get_regs_len) { + len = ops->get_regs_len(dev); + if (len < 0) + return len; + ret += len; + } + + ret += sizeof(struct ethtool_drvinfo); + ret += sizeof(struct ethtool_regs); + + if (ds->ops->get_regs_len) { + len = ds->ops->get_regs_len(ds, port); + if (len < 0) + return len; + ret += len; + } + + return ret; +} + +static void dsa_master_get_regs(struct net_device *dev, + struct ethtool_regs *regs, void *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + struct ethtool_drvinfo *cpu_info; + struct ethtool_regs *cpu_regs; + int port = cpu_dp->index; + int len; + + if (ops->get_regs_len && ops->get_regs) { + len = ops->get_regs_len(dev); + if (len < 0) + return; + regs->len = len; + ops->get_regs(dev, regs, data); + data += regs->len; + } + + cpu_info = (struct ethtool_drvinfo *)data; + strlcpy(cpu_info->driver, "dsa", sizeof(cpu_info->driver)); + data += sizeof(*cpu_info); + cpu_regs = (struct ethtool_regs *)data; + data += sizeof(*cpu_regs); + + if (ds->ops->get_regs_len && ds->ops->get_regs) { + len = ds->ops->get_regs_len(ds, port); + if (len < 0) + return; + cpu_regs->len = len; + ds->ops->get_regs(ds, port, cpu_regs, data); + } +} + static void dsa_master_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) @@ -147,6 +211,8 @@ static int dsa_master_ethtool_setup(struct net_device *dev) if (cpu_dp->orig_ethtool_ops) memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops)); + ops->get_regs_len = dsa_master_get_regs_len; + ops->get_regs = dsa_master_get_regs; ops->get_sset_count = dsa_master_get_sset_count; ops->get_ethtool_stats = dsa_master_get_ethtool_stats; ops->get_strings = dsa_master_get_strings; diff --git a/net/dsa/port.c b/net/dsa/port.c index f071acf2842b..9b54e5a76297 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -348,10 +348,7 @@ int dsa_port_vlan_add(struct dsa_port *dp, .vlan = vlan, }; - if (!dp->bridge_dev || br_vlan_enabled(dp->bridge_dev)) - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); - - return 0; + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); } int dsa_port_vlan_del(struct dsa_port *dp, @@ -363,10 +360,7 @@ int dsa_port_vlan_del(struct dsa_port *dp, .vlan = vlan, }; - if (!dp->bridge_dev || br_vlan_enabled(dp->bridge_dev)) - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); - - return 0; + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); } int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags) @@ -382,8 +376,8 @@ int dsa_port_vid_add(struct dsa_port *dp, u16 vid, u16 flags) trans.ph_prepare = true; err = dsa_port_vlan_add(dp, &vlan, &trans); - if (err == -EOPNOTSUPP) - return 0; + if (err) + return err; trans.ph_prepare = false; return dsa_port_vlan_add(dp, &vlan, &trans); @@ -538,10 +532,6 @@ static int dsa_port_setup_phy_of(struct dsa_port *dp, bool enable) return PTR_ERR(phydev); if (enable) { - err = genphy_config_init(phydev); - if (err < 0) - goto err_put_dev; - err = genphy_resume(phydev); if (err < 0) goto err_put_dev; @@ -589,7 +579,6 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp) mode = PHY_INTERFACE_MODE_NA; phydev->interface = mode; - genphy_config_init(phydev); genphy_read_status(phydev); if (ds->ops->adjust_link) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 33f41178afcc..75d58229a4bd 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -312,6 +312,39 @@ static int dsa_slave_port_attr_set(struct net_device *dev, return ret; } +static int dsa_slave_vlan_add(struct net_device *dev, + const struct switchdev_obj *obj, + struct switchdev_trans *trans) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct switchdev_obj_port_vlan vlan; + int err; + + if (obj->orig_dev != dev) + return -EOPNOTSUPP; + + if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) + return 0; + + vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj); + + err = dsa_port_vlan_add(dp, &vlan, trans); + if (err) + return err; + + /* We need the dedicated CPU port to be a member of the VLAN as well. + * Even though drivers often handle CPU membership in special ways, + * it doesn't make sense to program a PVID, so clear this flag. + */ + vlan.flags &= ~BRIDGE_VLAN_INFO_PVID; + + err = dsa_port_vlan_add(dp->cpu_dp, &vlan, trans); + if (err) + return err; + + return 0; +} + static int dsa_slave_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans, @@ -339,10 +372,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, trans); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - if (obj->orig_dev != dev) - return -EOPNOTSUPP; - err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), - trans); + err = dsa_slave_vlan_add(dev, obj, trans); break; default: err = -EOPNOTSUPP; @@ -352,6 +382,23 @@ static int dsa_slave_port_obj_add(struct net_device *dev, return err; } +static int dsa_slave_vlan_del(struct net_device *dev, + const struct switchdev_obj *obj) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + if (obj->orig_dev != dev) + return -EOPNOTSUPP; + + if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev)) + return 0; + + /* Do not deprogram the CPU port as it may be shared with other user + * ports which can be members of this VLAN as well. + */ + return dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); +} + static int dsa_slave_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj) { @@ -371,9 +418,7 @@ static int dsa_slave_port_obj_del(struct net_device *dev, err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - if (obj->orig_dev != dev) - return -EOPNOTSUPP; - err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); + err = dsa_slave_vlan_del(dev, obj); break; default: err = -EOPNOTSUPP; @@ -990,12 +1035,16 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { - switch (type) { - case TC_SETUP_BLOCK: + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + + if (type == TC_SETUP_BLOCK) return dsa_slave_setup_tc_block(dev, type_data); - default: + + if (!ds->ops->port_setup_tc) return -EOPNOTSUPP; - } + + return ds->ops->port_setup_tc(ds, dp->index, type, type_data); } static void dsa_slave_get_stats64(struct net_device *dev, @@ -1073,6 +1122,9 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, * need to emulate the switchdev prepare + commit phase. */ if (dp->bridge_dev) { + if (!br_vlan_enabled(dp->bridge_dev)) + return 0; + /* br_vlan_get_info() returns -EINVAL or -ENOENT if the * device, respectively the VID is not found, returning * 0 means success, which is a failure for us here. @@ -1082,8 +1134,15 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto, return -EBUSY; } - /* This API only allows programming tagged, non-PVID VIDs */ - return dsa_port_vid_add(dp, vid, 0); + ret = dsa_port_vid_add(dp, vid, 0); + if (ret) + return ret; + + ret = dsa_port_vid_add(dp->cpu_dp, vid, 0); + if (ret) + return ret; + + return 0; } static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, @@ -1097,6 +1156,9 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, * need to emulate the switchdev prepare + commit phase. */ if (dp->bridge_dev) { + if (!br_vlan_enabled(dp->bridge_dev)) + return 0; + /* br_vlan_get_info() returns -EINVAL or -ENOENT if the * device, respectively the VID is not found, returning * 0 means success, which is a failure for us here. @@ -1106,11 +1168,10 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, return -EBUSY; } - ret = dsa_port_vid_del(dp, vid); - if (ret == -EOPNOTSUPP) - ret = 0; - - return ret; + /* Do not deprogram the CPU port as it may be shared with other user + * ports which can be members of this VLAN as well. + */ + return dsa_port_vid_del(dp, vid); } static const struct ethtool_ops dsa_slave_ethtool_ops = { @@ -1357,8 +1418,9 @@ int dsa_slave_create(struct dsa_port *port) if (slave_dev == NULL) return -ENOMEM; - slave_dev->features = master->vlan_features | NETIF_F_HW_TC | - NETIF_F_HW_VLAN_CTAG_FILTER; + slave_dev->features = master->vlan_features | NETIF_F_HW_TC; + if (ds->ops->port_vlan_add && ds->ops->port_vlan_del) + slave_dev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; slave_dev->hw_features |= NETIF_F_HW_TC; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; if (!IS_ERR_OR_NULL(port->mac)) diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 09d9286b27cc..6a9607518823 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -128,57 +128,51 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, return ds->ops->port_fdb_del(ds, port, info->addr, info->vid); } -static int -dsa_switch_mdb_prepare_bitmap(struct dsa_switch *ds, - const struct switchdev_obj_port_mdb *mdb, - const unsigned long *bitmap) +static bool dsa_switch_mdb_match(struct dsa_switch *ds, int port, + struct dsa_notifier_mdb_info *info) +{ + if (ds->index == info->sw_index && port == info->port) + return true; + + if (dsa_is_dsa_port(ds, port)) + return true; + + return false; +} + +static int dsa_switch_mdb_prepare(struct dsa_switch *ds, + struct dsa_notifier_mdb_info *info) { int port, err; if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) return -EOPNOTSUPP; - for_each_set_bit(port, bitmap, ds->num_ports) { - err = ds->ops->port_mdb_prepare(ds, port, mdb); - if (err) - return err; + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_mdb_match(ds, port, info)) { + err = ds->ops->port_mdb_prepare(ds, port, info->mdb); + if (err) + return err; + } } return 0; } -static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds, - const struct switchdev_obj_port_mdb *mdb, - const unsigned long *bitmap) -{ - int port; - - if (!ds->ops->port_mdb_add) - return; - - for_each_set_bit(port, bitmap, ds->num_ports) - ds->ops->port_mdb_add(ds, port, mdb); -} - static int dsa_switch_mdb_add(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - const struct switchdev_obj_port_mdb *mdb = info->mdb; - struct switchdev_trans *trans = info->trans; int port; - /* Build a mask of Multicast group members */ - bitmap_zero(ds->bitmap, ds->num_ports); - if (ds->index == info->sw_index) - set_bit(info->port, ds->bitmap); - for (port = 0; port < ds->num_ports; port++) - if (dsa_is_dsa_port(ds, port)) - set_bit(port, ds->bitmap); + if (switchdev_trans_ph_prepare(info->trans)) + return dsa_switch_mdb_prepare(ds, info); - if (switchdev_trans_ph_prepare(trans)) - return dsa_switch_mdb_prepare_bitmap(ds, mdb, ds->bitmap); + if (!ds->ops->port_mdb_add) + return 0; - dsa_switch_mdb_add_bitmap(ds, mdb, ds->bitmap); + for (port = 0; port < ds->num_ports; port++) + if (dsa_switch_mdb_match(ds, port, info)) + ds->ops->port_mdb_add(ds, port, info->mdb); return 0; } @@ -186,13 +180,11 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, static int dsa_switch_mdb_del(struct dsa_switch *ds, struct dsa_notifier_mdb_info *info) { - const struct switchdev_obj_port_mdb *mdb = info->mdb; - if (!ds->ops->port_mdb_del) return -EOPNOTSUPP; if (ds->index == info->sw_index) - return ds->ops->port_mdb_del(ds, info->port, mdb); + return ds->ops->port_mdb_del(ds, info->port, info->mdb); return 0; } @@ -234,59 +226,55 @@ static int dsa_port_vlan_check(struct dsa_switch *ds, int port, (void *)vlan); } -static int -dsa_switch_vlan_prepare_bitmap(struct dsa_switch *ds, - const struct switchdev_obj_port_vlan *vlan, - const unsigned long *bitmap) +static bool dsa_switch_vlan_match(struct dsa_switch *ds, int port, + struct dsa_notifier_vlan_info *info) +{ + if (ds->index == info->sw_index && port == info->port) + return true; + + if (dsa_is_dsa_port(ds, port)) + return true; + + return false; +} + +static int dsa_switch_vlan_prepare(struct dsa_switch *ds, + struct dsa_notifier_vlan_info *info) { int port, err; if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) return -EOPNOTSUPP; - for_each_set_bit(port, bitmap, ds->num_ports) { - err = dsa_port_vlan_check(ds, port, vlan); - if (err) - return err; + for (port = 0; port < ds->num_ports; port++) { + if (dsa_switch_vlan_match(ds, port, info)) { + err = dsa_port_vlan_check(ds, port, info->vlan); + if (err) + return err; - err = ds->ops->port_vlan_prepare(ds, port, vlan); - if (err) - return err; + err = ds->ops->port_vlan_prepare(ds, port, info->vlan); + if (err) + return err; + } } return 0; } -static void -dsa_switch_vlan_add_bitmap(struct dsa_switch *ds, - const struct switchdev_obj_port_vlan *vlan, - const unsigned long *bitmap) -{ - int port; - - for_each_set_bit(port, bitmap, ds->num_ports) - ds->ops->port_vlan_add(ds, port, vlan); -} - static int dsa_switch_vlan_add(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { - const struct switchdev_obj_port_vlan *vlan = info->vlan; - struct switchdev_trans *trans = info->trans; int port; - /* Build a mask of VLAN members */ - bitmap_zero(ds->bitmap, ds->num_ports); - if (ds->index == info->sw_index) - set_bit(info->port, ds->bitmap); - for (port = 0; port < ds->num_ports; port++) - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) - set_bit(port, ds->bitmap); + if (switchdev_trans_ph_prepare(info->trans)) + return dsa_switch_vlan_prepare(ds, info); - if (switchdev_trans_ph_prepare(trans)) - return dsa_switch_vlan_prepare_bitmap(ds, vlan, ds->bitmap); + if (!ds->ops->port_vlan_add) + return 0; - dsa_switch_vlan_add_bitmap(ds, vlan, ds->bitmap); + for (port = 0; port < ds->num_ports; port++) + if (dsa_switch_vlan_match(ds, port, info)) + ds->ops->port_vlan_add(ds, port, info->vlan); return 0; } @@ -294,14 +282,15 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds, static int dsa_switch_vlan_del(struct dsa_switch *ds, struct dsa_notifier_vlan_info *info) { - const struct switchdev_obj_port_vlan *vlan = info->vlan; - if (!ds->ops->port_vlan_del) return -EOPNOTSUPP; if (ds->index == info->sw_index) - return ds->ops->port_vlan_del(ds, info->port, vlan); + return ds->ops->port_vlan_del(ds, info->port, info->vlan); + /* Do not deprogram the DSA links as they may be used as conduit + * for other VLAN members in the fabric. + */ return 0; } diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index 67a1bc635a7b..9c1cc2482b68 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -93,6 +93,79 @@ int dsa_8021q_rx_source_port(u16 vid) } EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); +static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port) +{ + struct bridge_vlan_info vinfo; + struct net_device *slave; + u16 pvid; + int err; + + if (!dsa_is_user_port(ds, port)) + return 0; + + slave = ds->ports[port].slave; + + err = br_vlan_get_pvid(slave, &pvid); + if (err < 0) + /* There is no pvid on the bridge for this port, which is + * perfectly valid. Nothing to restore, bye-bye! + */ + return 0; + + err = br_vlan_get_info(slave, pvid, &vinfo); + if (err < 0) { + dev_err(ds->dev, "Couldn't determine PVID attributes\n"); + return err; + } + + return dsa_port_vid_add(&ds->ports[port], pvid, vinfo.flags); +} + +/* If @enabled is true, installs @vid with @flags into the switch port's HW + * filter. + * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the + * user explicitly configured this @vid through the bridge core, then the @vid + * is installed again, but this time with the flags from the bridge layer. + */ +static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid, + u16 flags, bool enabled) +{ + struct dsa_port *dp = &ds->ports[port]; + struct bridge_vlan_info vinfo; + int err; + + if (enabled) + return dsa_port_vid_add(dp, vid, flags); + + err = dsa_port_vid_del(dp, vid); + if (err < 0) + return err; + + /* Nothing to restore from the bridge for a non-user port. + * The CPU port VLANs are restored implicitly with the user ports, + * similar to how the bridge does in dsa_slave_vlan_add and + * dsa_slave_vlan_del. + */ + if (!dsa_is_user_port(ds, port)) + return 0; + + err = br_vlan_get_info(dp->slave, vid, &vinfo); + /* Couldn't determine bridge attributes for this vid, + * it means the bridge had not configured it. + */ + if (err < 0) + return 0; + + /* Restore the VID from the bridge */ + err = dsa_port_vid_add(dp, vid, vinfo.flags); + if (err < 0) + return err; + + vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID; + + return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags); +} + /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single * front-panel switch port (here swp0). * @@ -148,8 +221,6 @@ EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port); int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) { int upstream = dsa_upstream_port(ds, port); - struct dsa_port *dp = &ds->ports[port]; - struct dsa_port *upstream_dp = &ds->ports[upstream]; u16 rx_vid = dsa_8021q_rx_vid(ds, port); u16 tx_vid = dsa_8021q_tx_vid(ds, port); int i, err; @@ -166,7 +237,6 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) * restrictions, so there are no concerns about leaking traffic. */ for (i = 0; i < ds->num_ports; i++) { - struct dsa_port *other_dp = &ds->ports[i]; u16 flags; if (i == upstream) @@ -179,10 +249,7 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) /* The RX VID is a regular VLAN on all others */ flags = BRIDGE_VLAN_INFO_UNTAGGED; - if (enabled) - err = dsa_port_vid_add(other_dp, rx_vid, flags); - else - err = dsa_port_vid_del(other_dp, rx_vid); + err = dsa_8021q_vid_apply(ds, i, rx_vid, flags, enabled); if (err) { dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", rx_vid, port, err); @@ -193,10 +260,7 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) /* CPU port needs to see this port's RX VID * as tagged egress. */ - if (enabled) - err = dsa_port_vid_add(upstream_dp, rx_vid, 0); - else - err = dsa_port_vid_del(upstream_dp, rx_vid); + err = dsa_8021q_vid_apply(ds, upstream, rx_vid, 0, enabled); if (err) { dev_err(ds->dev, "Failed to apply RX VID %d to port %d: %d\n", rx_vid, port, err); @@ -204,26 +268,24 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled) } /* Finally apply the TX VID on this port and on the CPU port */ - if (enabled) - err = dsa_port_vid_add(dp, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED); - else - err = dsa_port_vid_del(dp, tx_vid); + err = dsa_8021q_vid_apply(ds, port, tx_vid, BRIDGE_VLAN_INFO_UNTAGGED, + enabled); if (err) { dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", tx_vid, port, err); return err; } - if (enabled) - err = dsa_port_vid_add(upstream_dp, tx_vid, 0); - else - err = dsa_port_vid_del(upstream_dp, tx_vid); + err = dsa_8021q_vid_apply(ds, upstream, tx_vid, 0, enabled); if (err) { dev_err(ds->dev, "Failed to apply TX VID %d on port %d: %d\n", tx_vid, upstream, err); return err; } - return 0; + if (!enabled) + err = dsa_8021q_restore_pvid(ds, port); + + return err; } EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index b4872b87d4a6..73605bcbb385 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -70,6 +70,67 @@ static struct sk_buff *ksz_common_rcv(struct sk_buff *skb, } /* + * For Ingress (Host -> KSZ8795), 1 byte is added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5) + * + * For Egress (KSZ8795 -> Host), 1 byte is added before FCS. + * --------------------------------------------------------------------------- + * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes) + * --------------------------------------------------------------------------- + * tag0 : zero-based value represents port + * (eg, 0x00=port1, 0x02=port3, 0x06=port7) + */ + +#define KSZ8795_INGRESS_TAG_LEN 1 + +#define KSZ8795_TAIL_TAG_OVERRIDE BIT(6) +#define KSZ8795_TAIL_TAG_LOOKUP BIT(7) + +static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct sk_buff *nskb; + u8 *tag; + u8 *addr; + + nskb = ksz_common_xmit(skb, dev, KSZ8795_INGRESS_TAG_LEN); + if (!nskb) + return NULL; + + /* Tag encoding */ + tag = skb_put(nskb, KSZ8795_INGRESS_TAG_LEN); + addr = skb_mac_header(nskb); + + *tag = 1 << dp->index; + if (is_link_local_ether_addr(addr)) + *tag |= KSZ8795_TAIL_TAG_OVERRIDE; + + return nskb; +} + +static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt) +{ + u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + + return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN); +} + +static const struct dsa_device_ops ksz8795_netdev_ops = { + .name = "ksz8795", + .proto = DSA_TAG_PROTO_KSZ8795, + .xmit = ksz8795_xmit, + .rcv = ksz8795_rcv, + .overhead = KSZ8795_INGRESS_TAG_LEN, +}; + +DSA_TAG_DRIVER(ksz8795_netdev_ops); +MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795); + +/* * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS. * --------------------------------------------------------------------------- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes) @@ -183,6 +244,7 @@ DSA_TAG_DRIVER(ksz9893_netdev_ops); MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893); static struct dsa_tag_driver *dsa_tag_driver_array[] = { + &DSA_TAG_DRIVER_NAME(ksz8795_netdev_ops), &DSA_TAG_DRIVER_NAME(ksz9477_netdev_ops), &DSA_TAG_DRIVER_NAME(ksz9893_netdev_ops), }; diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 47ee88163a9d..9c9aff3e52cf 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -89,7 +89,8 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb, struct dsa_port *dp = dsa_slave_to_port(netdev); struct dsa_switch *ds = dp->ds; u16 tx_vid = dsa_8021q_tx_vid(ds, dp->index); - u8 pcp = skb->priority; + u16 queue_mapping = skb_get_queue_mapping(skb); + u8 pcp = netdev_txq_to_tc(netdev, queue_mapping); /* Transmitting management traffic does not rely upon switch tagging, * but instead SPI-installed management routes. Part 2 of this diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ed2301ef872e..70f92aaca411 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1845,13 +1845,8 @@ static __net_init int inet_init_net(struct net *net) return 0; } -static __net_exit void inet_exit_net(struct net *net) -{ -} - static __net_initdata struct pernet_operations af_inet_ops = { .init = inet_init_net, - .exit = inet_exit_net, }; static int __init init_inet_pernet_ops(void) diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 7bd29e694603..9a0fe0c2fa02 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -15,6 +15,7 @@ #include <net/sock.h> #include <net/route.h> #include <net/tcp_states.h> +#include <net/sock_reuseport.h> int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -69,6 +70,7 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len } inet->inet_daddr = fl4->daddr; inet->inet_dport = usin->sin_port; + reuseport_has_conns(sk, true); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); inet->inet_id = jiffies; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index cc7ef0d05bbd..5eb73775c3f7 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1266,6 +1266,7 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->length = 0; cork->ttl = ipc->ttl; cork->tos = ipc->tos; + cork->mark = ipc->sockc.mark; cork->priority = ipc->priority; cork->transmit_time = ipc->sockc.transmit_time; cork->tx_flags = 0; @@ -1529,7 +1530,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, } skb->priority = (cork->tos != -1) ? cork->priority: sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = cork->mark; skb->tstamp = cork->transmit_time; /* * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c07bc82cbbe9..313470f6bb14 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1134,8 +1134,8 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, if (!found) { /* Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || - (c = ipmr_cache_alloc_unres()) == NULL) { + c = ipmr_cache_alloc_unres(); + if (!c) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 69e76d677f9e..f17b402111ce 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -272,7 +272,7 @@ config IP_NF_TARGET_CLUSTERIP The CLUSTERIP target allows you to build load-balancing clusters of network servers without having a dedicated load-balancing router/server/switch. - + To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ECN @@ -281,7 +281,7 @@ config IP_NF_TARGET_ECN depends on NETFILTER_ADVANCED ---help--- This option adds a `ECN' target, which can be used in the iptables mangle - table. + table. You can use this target to remove the ECN bits from the IPv4 header of an IP packet. This is particularly useful, if you need to work around @@ -306,7 +306,7 @@ config IP_NF_RAW This option adds a `raw' table to iptables. This table is the very first in the netfilter framework and hooks in at the PREROUTING and OUTPUT chains. - + If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.rst>. If unsure, say `N'. @@ -318,7 +318,7 @@ config IP_NF_SECURITY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. - + If unsure, say N. endif # IP_NF_IPTABLES diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index c50e0ec095d2..7c497c78105f 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_NFT_DUP_IPV4) += nft_dup_ipv4.o # flow table support obj-$(CONFIG_NF_FLOW_TABLE_IPV4) += nf_flow_table_ipv4.o -# generic IP tables +# generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 0e70f3f65f6f..748dc3ce58d3 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -36,8 +36,8 @@ synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; - opts.mss_encode = opts.mss; - opts.mss = info->mss; + opts.mss_encode = opts.mss_option; + opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 5fe5a3981d43..fc34fd1668d6 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1151,7 +1151,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_encap_type = cfg->nh_encap_type, }; u32 tb_id = l3mdev_fib_table(cfg->dev); - int err = -EINVAL; + int err; err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack); if (err) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 9d24ef5c5d8f..535427292194 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -781,7 +781,7 @@ static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } else if (!ipc.oif) ipc.oif = inet->uc_index; - flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, sk->sk_uid); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 40a6abbc9cf6..80da5a66d5d7 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -375,7 +375,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb_reserve(skb, hlen); skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; skb_dst_set(skb, &rt->dst); *rtp = NULL; @@ -623,7 +623,7 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } } - flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, + flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, RT_SCOPE_UNIVERSE, hdrincl ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk) | diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 0b980e841927..59ded25acd04 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -820,6 +820,15 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = &tcp_min_snd_mss_max, }, { + .procname = "tcp_mtu_probe_floor", + .data = &init_net.ipv4.sysctl_tcp_mtu_probe_floor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_min_snd_mss_min, + .extra2 = &tcp_min_snd_mss_max, + }, + { .procname = "tcp_probe_threshold", .data = &init_net.ipv4.sysctl_tcp_probe_threshold, .maxlen = sizeof(int), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 61082065b26a..79c325a07ba5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1182,7 +1182,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) struct sockcm_cookie sockc; int flags, err, copied = 0; int mss_now = 0, size_goal, copied_syn = 0; - bool process_backlog = false; + int process_backlog = 0; bool zc = false; long timeo; @@ -1274,9 +1274,10 @@ new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; - if (process_backlog && sk_flush_backlog(sk)) { - process_backlog = false; - goto restart; + if (unlikely(process_backlog >= 16)) { + process_backlog = 0; + if (sk_flush_backlog(sk)) + goto restart; } first_skb = tcp_rtx_and_write_queues_empty(sk); skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, @@ -1284,7 +1285,7 @@ new_segment: if (!skb) goto wait_for_memory; - process_backlog = true; + process_backlog++; skb->ip_summed = CHECKSUM_PARTIAL; skb_entail(sk, skb); @@ -1789,19 +1790,21 @@ static int tcp_zerocopy_receive(struct sock *sk, break; frags = skb_shinfo(skb)->frags; while (offset) { - if (frags->size > offset) + if (skb_frag_size(frags) > offset) goto out; - offset -= frags->size; + offset -= skb_frag_size(frags); frags++; } } - if (frags->size != PAGE_SIZE || frags->page_offset) { + if (skb_frag_size(frags) != PAGE_SIZE || skb_frag_off(frags)) { int remaining = zc->recv_skip_hint; + int size = skb_frag_size(frags); - while (remaining && (frags->size != PAGE_SIZE || - frags->page_offset)) { - remaining -= frags->size; + while (remaining && (size != PAGE_SIZE || + skb_frag_off(frags))) { + remaining -= size; frags++; + size = skb_frag_size(frags); } zc->recv_skip_hint -= remaining; break; @@ -2650,6 +2653,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->rx_opt.saw_tstamp = 0; tp->rx_opt.dsack = 0; tp->rx_opt.num_sacks = 0; + tp->rcv_ooopack = 0; /* Clean up fastopen related fields */ @@ -3292,6 +3296,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_bytes_retrans = tp->bytes_retrans; info->tcpi_dsack_dups = tp->dsack_dups; info->tcpi_reord_seen = tp->reord_seen; + info->tcpi_rcv_ooopack = tp->rcv_ooopack; + info->tcpi_snd_wnd = tp->snd_wnd; unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -3794,8 +3800,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, return 1; for (i = 0; i < shi->nr_frags; ++i) { - const struct skb_frag_struct *f = &shi->frags[i]; - unsigned int offset = f->page_offset; + const skb_frag_t *f = &shi->frags[i]; + unsigned int offset = skb_frag_off(f); struct page *page = skb_frag_page(f) + (offset >> PAGE_SHIFT); sg_set_page(&sg, page, skb_frag_size(f), diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 56be7d27f208..95b59540eee1 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -346,7 +346,7 @@ static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) /* Calculate bdp based on min RTT and the estimated bottleneck bandwidth: * - * bdp = bw * min_rtt * gain + * bdp = ceil(bw * min_rtt * gain) * * The key factor, gain, controls the amount of queue. While a small gain * builds a smaller queue, it becomes more vulnerable to noise in RTT @@ -370,7 +370,9 @@ static u32 bbr_bdp(struct sock *sk, u32 bw, int gain) w = (u64)bw * bbr->min_rtt_us; - /* Apply a gain to the given value, then remove the BW_SCALE shift. */ + /* Apply a gain to the given value, remove the BW_SCALE shift, and + * round the value up to avoid a negative feedback loop. + */ bdp = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; return bdp; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index a3a386236d93..81a8221d650a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -81,13 +81,42 @@ static int tcp_diag_put_md5sig(struct sk_buff *skb, } #endif +static int tcp_diag_put_ulp(struct sk_buff *skb, struct sock *sk, + const struct tcp_ulp_ops *ulp_ops) +{ + struct nlattr *nest; + int err; + + nest = nla_nest_start_noflag(skb, INET_DIAG_ULP_INFO); + if (!nest) + return -EMSGSIZE; + + err = nla_put_string(skb, INET_ULP_INFO_NAME, ulp_ops->name); + if (err) + goto nla_failure; + + if (ulp_ops->get_info) + err = ulp_ops->get_info(sk, skb); + if (err) + goto nla_failure; + + nla_nest_end(skb, nest); + return 0; + +nla_failure: + nla_nest_cancel(skb, nest); + return err; +} + static int tcp_diag_get_aux(struct sock *sk, bool net_admin, struct sk_buff *skb) { + struct inet_connection_sock *icsk = inet_csk(sk); + int err = 0; + #ifdef CONFIG_TCP_MD5SIG if (net_admin) { struct tcp_md5sig_info *md5sig; - int err = 0; rcu_read_lock(); md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); @@ -99,11 +128,21 @@ static int tcp_diag_get_aux(struct sock *sk, bool net_admin, } #endif + if (net_admin) { + const struct tcp_ulp_ops *ulp_ops; + + ulp_ops = icsk->icsk_ulp_ops; + if (ulp_ops) + err = tcp_diag_put_ulp(skb, sk, ulp_ops); + if (err) + return err; + } return 0; } static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) { + struct inet_connection_sock *icsk = inet_csk(sk); size_t size = 0; #ifdef CONFIG_TCP_MD5SIG @@ -124,6 +163,17 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) } #endif + if (net_admin && sk_fullsock(sk)) { + const struct tcp_ulp_ops *ulp_ops; + + ulp_ops = icsk->icsk_ulp_ops; + if (ulp_ops) { + size += nla_total_size(0) + + nla_total_size(TCP_ULP_NAME_MAX); + if (ulp_ops->get_info_size) + size += ulp_ops->get_info_size(sk); + } + } return size; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8a1cd93dbb09..3578357abe30 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3782,6 +3782,49 @@ static void smc_parse_options(const struct tcphdr *th, #endif } +/* Try to parse the MSS option from the TCP header. Return 0 on failure, clamped + * value on success. + */ +static u16 tcp_parse_mss_option(const struct tcphdr *th, u16 user_mss) +{ + const unsigned char *ptr = (const unsigned char *)(th + 1); + int length = (th->doff * 4) - sizeof(struct tcphdr); + u16 mss = 0; + + while (length > 0) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return mss; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + if (length < 2) + return mss; + opsize = *ptr++; + if (opsize < 2) /* "silly options" */ + return mss; + if (opsize > length) + return mss; /* fail on partial options */ + if (opcode == TCPOPT_MSS && opsize == TCPOLEN_MSS) { + u16 in_mss = get_unaligned_be16(ptr); + + if (in_mss) { + if (user_mss && user_mss < in_mss) + in_mss = user_mss; + mss = in_mss; + } + } + ptr += opsize - 2; + length -= opsize; + } + } + return mss; +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -4512,6 +4555,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) tp->pred_flags = 0; inet_csk_schedule_ack(sk); + tp->rcv_ooopack += max_t(u16, 1, skb_shinfo(skb)->gso_segs); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); seq = TCP_SKB_CB(skb)->seq; end_seq = TCP_SKB_CB(skb)->end_seq; @@ -6422,9 +6466,7 @@ EXPORT_SYMBOL(inet_reqsk_alloc); /* * Return true if a syncookie should be sent */ -static bool tcp_syn_flood_action(const struct sock *sk, - const struct sk_buff *skb, - const char *proto) +static bool tcp_syn_flood_action(const struct sock *sk, const char *proto) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const char *msg = "Dropping request"; @@ -6444,7 +6486,7 @@ static bool tcp_syn_flood_action(const struct sock *sk, net->ipv4.sysctl_tcp_syncookies != 2 && xchg(&queue->synflood_warned, 1) == 0) net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n", - proto, ntohs(tcp_hdr(skb)->dest), msg); + proto, sk->sk_num, msg); return want_cookie; } @@ -6466,6 +6508,36 @@ static void tcp_reqsk_record_syn(const struct sock *sk, } } +/* If a SYN cookie is required and supported, returns a clamped MSS value to be + * used for SYN cookie generation. + */ +u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, + const struct tcp_request_sock_ops *af_ops, + struct sock *sk, struct tcphdr *th) +{ + struct tcp_sock *tp = tcp_sk(sk); + u16 mss; + + if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 && + !inet_csk_reqsk_queue_is_full(sk)) + return 0; + + if (!tcp_syn_flood_action(sk, rsk_ops->slab_name)) + return 0; + + if (sk_acceptq_is_full(sk)) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); + return 0; + } + + mss = tcp_parse_mss_option(th, tp->rx_opt.user_mss); + if (!mss) + mss = af_ops->mss_clamp; + + return mss; +} +EXPORT_SYMBOL_GPL(tcp_get_syncookie_mss); + int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, struct sock *sk, struct sk_buff *skb) @@ -6487,7 +6559,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, */ if ((net->ipv4.sysctl_tcp_syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) { - want_cookie = tcp_syn_flood_action(sk, skb, rsk_ops->slab_name); + want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name); if (!want_cookie) goto drop; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index d57641cb3477..fd394ad179a0 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1515,6 +1515,21 @@ static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v4_get_syncookie(struct sock *sk, struct iphdr *iph, + struct tcphdr *th, u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + mss = tcp_get_syncookie_mss(&tcp_request_sock_ops, + &tcp_request_sock_ipv4_ops, sk, th); + if (mss) { + *cookie = __cookie_v4_init_sequence(iph, th, &mss); + tcp_synq_overflow(sk); + } +#endif + return mss; +} + /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * @@ -2637,6 +2652,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_min_snd_mss = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_probe_threshold = TCP_PROBE_THRESHOLD; net->ipv4.sysctl_tcp_probe_interval = TCP_PROBE_INTERVAL; + net->ipv4.sysctl_tcp_mtu_probe_floor = TCP_MIN_SND_MSS; net->ipv4.sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; net->ipv4.sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8a645f304e6c..fec6d67bfd14 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1050,11 +1050,22 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, tcb = TCP_SKB_CB(skb); memset(&opts, 0, sizeof(opts)); - if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) + if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5); - else + } else { tcp_options_size = tcp_established_options(sk, skb, &opts, &md5); + /* Force a PSH flag on all (GSO) packets to expedite GRO flush + * at receiver : This slightly improve GRO performance. + * Note that we do not force the PSH flag for non GSO packets, + * because they might be sent under high congestion events, + * and in this case it is better to delay the delivery of 1-MSS + * packets and thus the corresponding ACK packet that would + * release the following packet. + */ + if (tcp_skb_pcount(skb) > 1) + tcb->tcp_flags |= TCPHDR_PSH; + } tcp_header_size = tcp_options_size + sizeof(struct tcphdr); /* if no packet is in qdisc/device queue, then allow XPS to select @@ -1403,7 +1414,7 @@ static int __pskb_trim_head(struct sk_buff *skb, int len) } else { shinfo->frags[k] = shinfo->frags[i]; if (eat) { - shinfo->frags[k].page_offset += eat; + skb_frag_off_add(&shinfo->frags[k], eat); skb_frag_size_sub(&shinfo->frags[k], eat); eat = 0; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c801cd37cc2a..dbd9d2d0ee63 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -154,7 +154,7 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) } else { mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); + mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor); mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss); icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d88821c794fb..cf755156a684 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -423,12 +423,13 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport) { + if (sk->sk_reuseport && + sk->sk_state != TCP_ESTABLISHED) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (result) + if (result && !reuseport_has_conns(sk, false)) return result; } badness = score; @@ -1130,7 +1131,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) fl4 = &fl4_stack; - flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos, + flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, tos, RT_SCOPE_UNIVERSE, sk->sk_protocol, flow_flags, faddr, saddr, dport, inet->inet_sport, diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 9ab897ded4df..96f939248d2f 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -27,6 +27,7 @@ #include <net/ip6_route.h> #include <net/tcp_states.h> #include <net/dsfield.h> +#include <net/sock_reuseport.h> #include <linux/errqueue.h> #include <linux/uaccess.h> @@ -254,6 +255,7 @@ ipv4_connected: goto out; } + reuseport_has_conns(sk, true); sk->sk_state = TCP_ESTABLISHED; sk_set_txhash(sk); out: diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index b358f1a4dd08..da46c4284676 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -197,10 +197,8 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, struct ipv6hdr _ip6, *ip6; ip6 = skb_header_pointer(skb, *offset, sizeof(_ip6), &_ip6); - if (!ip6 || (ip6->version != 6)) { - printk(KERN_ERR "IPv6 header not found\n"); + if (!ip6 || (ip6->version != 6)) return -EBADMSG; - } start = *offset + sizeof(struct ipv6hdr); nexthdr = ip6->nexthdr; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index dd2d0b963260..d5779d6a6065 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -968,7 +968,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || ip_tunnel_info_af(tun_info) != AF_INET6)) - return -EINVAL; + goto tx_err; key = &tun_info->key; memset(&fl6, 0, sizeof(fl6)); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index fa014d5f1732..d432d0011c16 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -80,8 +80,10 @@ static void ip6_sublist_rcv_finish(struct list_head *head) { struct sk_buff *skb, *next; - list_for_each_entry_safe(skb, next, head, list) + list_for_each_entry_safe(skb, next, head, list) { + skb_list_del_init(skb); dst_input(skb); + } } static void ip6_list_rcv_finish(struct net *net, struct sock *sk, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e49fd62eea9..89a4c7c2e25d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1294,6 +1294,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, cork->base.fragsize = mtu; cork->base.gso_size = ipc6->gso_size; cork->base.tx_flags = 0; + cork->base.mark = ipc6->sockc.mark; sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags); if (dst_allfrag(xfrm_dst_path(&rt->dst))) @@ -1764,7 +1765,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, hdr->daddr = *final_dst; skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = cork->base.mark; skb->tstamp = cork->base.transmit_time; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e80d36c5073d..857a89ad4d6c 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1148,8 +1148,8 @@ static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi, * Create a new entry if allowable */ - if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 || - (c = ip6mr_cache_alloc_unres()) == NULL) { + c = ip6mr_cache_alloc_unres(); + if (!c) { spin_unlock_bh(&mfc_unres_lock); kfree_skb(skb); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 083cc1c94cd3..53caf59c591e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -196,6 +196,7 @@ static inline int ndisc_is_useropt(const struct net_device *dev, { return opt->nd_opt_type == ND_OPT_RDNSS || opt->nd_opt_type == ND_OPT_DNSSL || + opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL || ndisc_ops_is_useropt(dev, opt->nd_opt_type); } diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 61819ed858b1..a9bff556d3b2 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -113,9 +113,9 @@ int __nf_ip6_route(struct net *net, struct dst_entry **dst, EXPORT_SYMBOL_GPL(__nf_ip6_route); int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, - struct nf_ct_bridge_frag_data *data, + struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, - const struct nf_ct_bridge_frag_data *data, + const struct nf_bridge_frag_data *data, struct sk_buff *)) { int frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 5cdb4a69d277..fd1f52a21bf1 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -36,8 +36,8 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) opts.options |= XT_SYNPROXY_OPT_ECN; opts.options &= info->options; - opts.mss_encode = opts.mss; - opts.mss = info->mss; + opts.mss_encode = opts.mss_option; + opts.mss_option = info->mss; if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, &opts); else diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 0fc6326ef499..c52ff929c93b 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -16,7 +16,7 @@ #include <net/ipv6.h> #include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_ipv6.h> #include <linux/netfilter_ipv6/ip6t_ipv6header.h> MODULE_LICENSE("GPL"); @@ -42,7 +42,7 @@ ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par) len = skb->len - ptr; temp = 0; - while (ip6t_ext_hdr(nexthdr)) { + while (nf_ip6_ext_hdr(nexthdr)) { const struct ipv6_opt_hdr *hp; struct ipv6_opt_hdr _hdr; int hdrlen; diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index f53bd8f01219..22b80db6d882 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -18,7 +18,7 @@ #include <net/route.h> #include <linux/netfilter.h> -#include <linux/netfilter_ipv6/ip6_tables.h> +#include <linux/netfilter_ipv6.h> #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> @@ -70,7 +70,7 @@ static void dump_ipv6_packet(struct net *net, struct nf_log_buf *m, fragment = 0; ptr = ip6hoff + sizeof(struct ipv6hdr); currenthdr = ih->nexthdr; - while (currenthdr != NEXTHDR_NONE && ip6t_ext_hdr(currenthdr)) { + while (currenthdr != NEXTHDR_NONE && nf_ip6_ext_hdr(currenthdr)) { struct ipv6_opt_hdr _hdr; const struct ipv6_opt_hdr *hp; diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index 437d95545c31..b9df879c48d3 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -12,7 +12,6 @@ #include <net/sock.h> #include <net/inet_sock.h> #include <net/inet6_hashtables.h> -#include <net/netfilter/ipv6/nf_defrag_ipv6.h> #include <net/netfilter/nf_socket.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 8a6131991e38..6e1888ee4036 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -646,7 +646,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; - skb->mark = sk->sk_mark; + skb->mark = sockc->mark; skb->tstamp = sockc->transmit_time; skb_put(skb, length); @@ -810,6 +810,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.sockc.tsflags = sk->sk_tsflags; + ipc6.sockc.mark = sk->sk_mark; if (sin6) { if (addr_len < SIN6_LEN_RFC2133) @@ -891,6 +892,7 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) opt = ipv6_fixup_options(&opt_space, opt); fl6.flowi6_proto = proto; + fl6.flowi6_mark = ipc6.sockc.mark; if (!hdrincl) { rfv.msg = msg; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 546088e50815..a63ff85fe141 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -227,7 +227,7 @@ static void ip6_confirm_neigh(const struct dst_entry *dst, const void *daddr) struct net_device *dev = dst->dev; struct rt6_info *rt = (struct rt6_info *)dst; - daddr = choose_neigh_daddr(&rt->rt6i_gateway, NULL, daddr); + daddr = choose_neigh_daddr(rt6_nexthop(rt, &in6addr_any), NULL, daddr); if (!daddr) return; if (dev->flags & (IFF_NOARP | IFF_LOOPBACK)) @@ -2725,10 +2725,9 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, rcu_read_lock(); res.f6i = rcu_dereference(rt6->from); - if (!res.f6i) { - rcu_read_unlock(); - return; - } + if (!res.f6i) + goto out_unlock; + res.fib6_flags = res.f6i->fib6_flags; res.fib6_type = res.f6i->fib6_type; @@ -2744,10 +2743,8 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, /* fib6_info uses a nexthop that does not have fib6_nh * using the dst->dev + gw. Should be impossible. */ - if (!arg.match) { - rcu_read_unlock(); - return; - } + if (!arg.match) + goto out_unlock; res.nh = arg.match; } else { @@ -2760,6 +2757,7 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (rt6_insert_exception(nrt6, &res)) dst_release_immediate(&nrt6->dst); } +out_unlock: rcu_read_unlock(); } } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5da069e91cac..87f44d3250ee 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1063,6 +1063,21 @@ static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) return sk; } +u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, + struct tcphdr *th, u32 *cookie) +{ + u16 mss = 0; +#ifdef CONFIG_SYN_COOKIES + mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, + &tcp_request_sock_ipv6_ops, sk, th); + if (mss) { + *cookie = __cookie_v6_init_sequence(iph, th, &mss); + tcp_synq_overflow(sk); + } +#endif + return mss; +} + static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 827fe7385078..aae4938f3dea 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -158,13 +158,14 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { - if (sk->sk_reuseport) { + if (sk->sk_reuseport && + sk->sk_state != TCP_ESTABLISHED) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); result = reuseport_select_sock(sk, hash, skb, sizeof(struct udphdr)); - if (result) + if (result && !reuseport_has_conns(sk, false)) return result; } result = sk; @@ -1230,6 +1231,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ipcm6_init(&ipc6); ipc6.gso_size = up->gso_size; ipc6.sockc.tsflags = sk->sk_tsflags; + ipc6.sockc.mark = sk->sk_mark; /* destination address check */ if (sin6) { @@ -1352,7 +1354,7 @@ do_udp_sendmsg: if (!fl6.flowi6_oif) fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = ipc6.sockc.mark; fl6.flowi6_uid = sk->sk_uid; if (msg->msg_controllen) { diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 5dbc0c48f8cb..8f12f5c6ab87 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -379,7 +379,7 @@ static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); struct bpf_prog *prog = psock->bpf_prog; - return (*prog->bpf_func)(skb, prog->insnsi); + return BPF_PROG_RUN(prog, skb); } static int kcm_read_sock_done(struct strparser *strp, int err) @@ -635,15 +635,15 @@ do_frag_list: frag_offset = 0; do_frag: frag = &skb_shinfo(skb)->frags[fragidx]; - if (WARN_ON(!frag->size)) { + if (WARN_ON(!skb_frag_size(frag))) { ret = -EINVAL; goto out; } ret = kernel_sendpage(psock->sk->sk_socket, - frag->page.p, - frag->page_offset + frag_offset, - frag->size - frag_offset, + skb_frag_page(frag), + skb_frag_off(frag) + frag_offset, + skb_frag_size(frag) - frag_offset, MSG_DONTWAIT); if (ret <= 0) { if (ret == -EAGAIN) { @@ -678,7 +678,7 @@ do_frag: sent += ret; frag_offset += ret; KCM_STATS_ADD(psock->stats.tx_bytes, ret); - if (frag_offset < frag->size) { + if (frag_offset < skb_frag_size(frag)) { /* Not finished with this frag */ goto do_frag; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 01b0dad24500..4d1c335e06e5 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -178,17 +178,54 @@ static void sta_rx_agg_reorder_timer_expired(struct timer_list *t) rcu_read_unlock(); } -static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, +static void ieee80211_add_addbaext(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + const struct ieee80211_addba_ext_ie *req) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_addba_ext_ie *resp; + const struct ieee80211_sta_he_cap *he_cap; + u8 frag_level, cap_frag_level; + u8 *pos; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return; + he_cap = ieee80211_get_he_iftype_cap(sband, sdata->vif.type); + if (!he_cap) + return; + + pos = skb_put_zero(skb, 2 + sizeof(struct ieee80211_addba_ext_ie)); + *pos++ = WLAN_EID_ADDBA_EXT; + *pos++ = sizeof(struct ieee80211_addba_ext_ie); + resp = (struct ieee80211_addba_ext_ie *)pos; + resp->data = req->data & IEEE80211_ADDBA_EXT_NO_FRAG; + + frag_level = u32_get_bits(req->data, + IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK); + cap_frag_level = u32_get_bits(he_cap->he_cap_elem.mac_cap_info[0], + IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK); + if (frag_level > cap_frag_level) + frag_level = cap_frag_level; + resp->data |= u8_encode_bits(frag_level, + IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK); +} + +static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, u8 dialog_token, u16 status, u16 policy, - u16 buf_size, u16 timeout) + u16 buf_size, u16 timeout, + const struct ieee80211_addba_ext_ie *addbaext) { + struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU); u16 capab; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + skb = dev_alloc_skb(sizeof(*mgmt) + + 2 + sizeof(struct ieee80211_addba_ext_ie) + + local->hw.extra_tx_headroom); if (!skb) return; @@ -222,13 +259,17 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); + if (sta->sta.he_cap.has_he && addbaext) + ieee80211_add_addbaext(sdata, skb, addbaext); + ieee80211_tx_skb(sdata, skb); } void ___ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq) + u16 buf_size, bool tx, bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext) { struct ieee80211_local *local = sta->sdata->local; struct tid_ampdu_rx *tid_agg_rx; @@ -410,21 +451,22 @@ end: } if (tx) - ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, + ieee80211_send_addba_resp(sta, sta->sta.addr, tid, dialog_token, status, 1, buf_size, - timeout); + timeout, addbaext); } static void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, - bool auto_seq) + bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext) { mutex_lock(&sta->ampdu_mlme.mtx); ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, - buf_size, tx, auto_seq); + buf_size, tx, auto_seq, addbaext); mutex_unlock(&sta->ampdu_mlme.mtx); } @@ -434,7 +476,9 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, size_t len) { u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; + struct ieee802_11_elems elems = { 0 }; u8 dialog_token; + int ies_len; /* extract session parameters from addba request frame */ dialog_token = mgmt->u.action.u.addba_req.dialog_token; @@ -447,9 +491,19 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; + ies_len = len - offsetof(struct ieee80211_mgmt, + u.action.u.addba_req.variable); + if (ies_len) { + ieee802_11_parse_elems(mgmt->u.action.u.addba_req.variable, + ies_len, true, &elems, mgmt->bssid, NULL); + if (elems.parse_error) + return; + } + __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, - buf_size, true, false); + buf_size, true, false, + elems.addba_ext_ie); } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4105c97c7ba1..70739e746c13 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -980,7 +980,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, BSS_CHANGED_SSID | BSS_CHANGED_P2P_PS | BSS_CHANGED_TXPOWER | - BSS_CHANGED_TWT; + BSS_CHANGED_TWT | + BSS_CHANGED_HE_OBSS_PD; int err; int prev_beacon_int; @@ -1051,6 +1052,8 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, sdata->vif.bss_conf.enable_beacon = true; sdata->vif.bss_conf.allow_p2p_go_ps = sdata->vif.p2p; sdata->vif.bss_conf.twt_responder = params->twt_responder; + memcpy(&sdata->vif.bss_conf.he_obss_pd, ¶ms->he_obss_pd, + sizeof(struct ieee80211_he_obss_pd)); sdata->vif.bss_conf.ssid_len = params->ssid_len; if (params->ssid_len) @@ -1542,7 +1545,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (ether_addr_equal(mac, sdata->vif.addr)) return -EINVAL; - if (is_multicast_ether_addr(mac)) + if (!is_valid_ether_addr(mac)) return -EINVAL; if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2e7f75938c51..568b3b276931 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -271,8 +271,7 @@ static const char *hw_flag_names[] = { FLAG(TX_STATUS_NO_AMPDU_LEN), FLAG(SUPPORTS_MULTI_BSSID), FLAG(SUPPORTS_ONLY_HE_MULTI_BSSID), - FLAG(EXT_KEY_ID_NATIVE), - FLAG(NO_AMPDU_KEYBORDER_SUPPORT), + FLAG(AMPDU_KEYBORDER_SUPPORT), #undef FLAG }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index c2d8b5451a5e..2c9b3eb8b652 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -692,14 +692,16 @@ static inline int drv_remain_on_channel(struct ieee80211_local *local, return ret; } -static inline int drv_cancel_remain_on_channel(struct ieee80211_local *local) +static inline int +drv_cancel_remain_on_channel(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) { int ret; might_sleep(); - trace_drv_cancel_remain_on_channel(local); - ret = local->ops->cancel_remain_on_channel(&local->hw); + trace_drv_cancel_remain_on_channel(local, sdata); + ret = local->ops->cancel_remain_on_channel(&local->hw, &sdata->vif); trace_drv_return_int(local, ret); return ret; diff --git a/net/mac80211/he.c b/net/mac80211/he.c index 219650591c79..736da0035135 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -50,3 +50,43 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, he_cap->has_he = true; } + +void +ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, + const struct ieee80211_he_operation *he_op_ie_elem) +{ + struct ieee80211_he_operation *he_operation = + &vif->bss_conf.he_operation; + + if (!he_op_ie_elem) { + memset(he_operation, 0, sizeof(*he_operation)); + return; + } + + vif->bss_conf.he_operation = *he_op_ie_elem; +} + +void +ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, + const struct ieee80211_he_spr *he_spr_ie_elem) +{ + struct ieee80211_he_obss_pd *he_obss_pd = + &vif->bss_conf.he_obss_pd; + const u8 *data; + + memset(he_obss_pd, 0, sizeof(*he_obss_pd)); + + if (!he_spr_ie_elem) + return; + data = he_spr_ie_elem->optional; + + if (he_spr_ie_elem->he_sr_control & + IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) + data++; + if (he_spr_ie_elem->he_sr_control & + IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) { + he_obss_pd->max_offset = *data++; + he_obss_pd->min_offset = *data++; + he_obss_pd->enable = true; + } +} diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index d5a500b2a448..a2e4d6b8fd98 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -359,7 +359,7 @@ void ieee80211_ba_session_work(struct work_struct *work) sta->ampdu_mlme.tid_rx_manage_offl)) ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, IEEE80211_MAX_AMPDU_BUF_HT, - false, true); + false, true, NULL); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index f00dca056295..0a6ff01c68a9 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1252,6 +1252,7 @@ void ieee80211_ibss_rx_no_sta(struct ieee80211_sub_if_data *sdata, static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) { + struct ieee80211_if_ibss *ifibss = &sdata->u.ibss; struct ieee80211_local *local = sdata->local; struct sta_info *sta, *tmp; unsigned long exp_time = IEEE80211_IBSS_INACTIVITY_LIMIT; @@ -1268,10 +1269,17 @@ static void ieee80211_ibss_sta_expire(struct ieee80211_sub_if_data *sdata) if (time_is_before_jiffies(last_active + exp_time) || (time_is_before_jiffies(last_active + exp_rsn) && sta->sta_state != IEEE80211_STA_AUTHORIZED)) { + u8 frame_buf[IEEE80211_DEAUTH_FRAME_LEN]; + sta_dbg(sta->sdata, "expiring inactive %sSTA %pM\n", sta->sta_state != IEEE80211_STA_AUTHORIZED ? "not authorized " : "", sta->sta.addr); + ieee80211_send_deauth_disassoc(sdata, sta->sta.addr, + ifibss->bssid, + IEEE80211_STYPE_DEAUTH, + WLAN_REASON_DEAUTH_LEAVING, + true, frame_buf); WARN_ON(__sta_info_destroy(sta)); } } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 004e2e3adb88..05406e9c05b3 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1480,6 +1480,7 @@ struct ieee802_11_elems { const struct ieee80211_meshconf_ie *mesh_config; const u8 *he_cap; const struct ieee80211_he_operation *he_operation; + const struct ieee80211_he_spr *he_spr; const struct ieee80211_mu_edca_param_set *mu_edca_param_set; const u8 *uora_element; const u8 *mesh_id; @@ -1506,6 +1507,7 @@ struct ieee802_11_elems { u8 max_bssid_indicator; u8 dtim_count; u8 dtim_period; + const struct ieee80211_addba_ext_ie *addba_ext_ie; /* length of them, respectively */ u8 ext_capab_len; @@ -1767,7 +1769,8 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u32 info_flags); void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_supported_band *sband, - int retry_count, int shift, bool send_to_cooked); + int retry_count, int shift, bool send_to_cooked, + struct ieee80211_tx_status *status); void ieee80211_check_fast_xmit(struct sta_info *sta); void ieee80211_check_fast_xmit_all(struct ieee80211_local *local); @@ -1804,7 +1807,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, void ___ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq); + u16 buf_size, bool tx, bool auto_seq, + const struct ieee80211_addba_ext_ie *addbaext); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, @@ -1869,6 +1873,13 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const u8 *he_cap_ie, u8 he_cap_len, struct sta_info *sta); +void +ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif, + const struct ieee80211_he_spr *he_spr_ie_elem); + +void +ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, + const struct ieee80211_he_operation *he_op_ie_elem); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, @@ -2088,7 +2099,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, const u8 *da, const u8 *key, u8 key_len, u8 key_idx, u32 tx_flags); void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, u16 stype, u16 reason, + const u8 *da, const u8 *bssid, + u16 stype, u16 reason, bool send_frame, u8 *frame_buf); enum { @@ -2133,9 +2145,11 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, u32 cap); u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, const struct cfg80211_chan_def *chandef); +u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype); u8 *ieee80211_ie_build_he_cap(u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end); +u8 *ieee80211_ie_build_he_oper(u8 *pos); int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef, const struct ieee80211_supported_band *sband, const u8 *srates, int srates_len, u32 *rates); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 8dc6580e1787..af8b09214786 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1876,7 +1876,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, /* MTU range: 256 - 2304 */ ndev->min_mtu = 256; - ndev->max_mtu = IEEE80211_MAX_DATA_LEN; + ndev->max_mtu = local->hw.max_mtu; ret = register_netdevice(ndev); if (ret) { diff --git a/net/mac80211/key.c b/net/mac80211/key.c index dd60f6428049..0f889b919b06 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -6,6 +6,7 @@ * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH + * Copyright 2018-2019 Intel Corporation */ #include <linux/if_ether.h> @@ -270,7 +271,7 @@ int ieee80211_set_tx_key(struct ieee80211_key *key) sta->ptk_idx = key->conf.keyidx; - if (ieee80211_hw_check(&local->hw, NO_AMPDU_KEYBORDER_SUPPORT)) + if (!ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) clear_sta_flag(sta, WLAN_STA_BLOCK_BA); ieee80211_check_fast_xmit(sta); @@ -290,15 +291,15 @@ static void ieee80211_pairwise_rekey(struct ieee80211_key *old, /* Extended Key ID key install, initial one or rekey */ if (sta->ptk_idx != INVALID_PTK_KEYIDX && - ieee80211_hw_check(&local->hw, - NO_AMPDU_KEYBORDER_SUPPORT)) { + !ieee80211_hw_check(&local->hw, AMPDU_KEYBORDER_SUPPORT)) { /* Aggregation Sessions with Extended Key ID must not * mix MPDUs with different keyIDs within one A-MPDU. - * Tear down any running Tx aggregation and all new - * Rx/Tx aggregation request during rekey if the driver - * asks us to do so. (Blocking Tx only would be - * sufficient but WLAN_STA_BLOCK_BA gets the job done - * for the few ms we need it.) + * Tear down running Tx aggregation sessions and block + * new Rx/Tx aggregation requests during rekey to + * ensure there are no A-MPDUs when the driver is not + * supporting A-MPDU key borders. (Blocking Tx only + * would be sufficient but WLAN_STA_BLOCK_BA gets the + * job done for the few ms we need it.) */ set_sta_flag(sta, WLAN_STA_BLOCK_BA); mutex_lock(&sta->ampdu_mlme.mtx); @@ -781,9 +782,8 @@ int ieee80211_key_link(struct ieee80211_key *key, /* The rekey code assumes that the old and new key are using * the same cipher. Enforce the assumption for pairwise keys. */ - if (key && - ((alt_key && alt_key->conf.cipher != key->conf.cipher) || - (old_key && old_key->conf.cipher != key->conf.cipher))) + if ((alt_key && alt_key->conf.cipher != key->conf.cipher) || + (old_key && old_key->conf.cipher != key->conf.cipher)) goto out; } else if (sta) { old_key = key_mtx_dereference(sdata->local, sta->gtk[idx]); @@ -793,7 +793,7 @@ int ieee80211_key_link(struct ieee80211_key *key, /* Non-pairwise keys must also not switch the cipher on rekey */ if (!pairwise) { - if (key && old_key && old_key->conf.cipher != key->conf.cipher) + if (old_key && old_key->conf.cipher != key->conf.cipher) goto out; } @@ -843,46 +843,30 @@ void ieee80211_key_free(struct ieee80211_key *key, bool delay_tailroom) ieee80211_key_destroy(key, delay_tailroom); } -void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata) +void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata) { struct ieee80211_key *key; struct ieee80211_sub_if_data *vlan; ASSERT_RTNL(); - if (WARN_ON(!ieee80211_sdata_running(sdata))) - return; - - mutex_lock(&sdata->local->key_mtx); - - WARN_ON_ONCE(sdata->crypto_tx_tailroom_needed_cnt || - sdata->crypto_tx_tailroom_pending_dec); - - if (sdata->vif.type == NL80211_IFTYPE_AP) { - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) - WARN_ON_ONCE(vlan->crypto_tx_tailroom_needed_cnt || - vlan->crypto_tx_tailroom_pending_dec); - } - - list_for_each_entry(key, &sdata->key_list, list) { - increment_tailroom_need_count(sdata); - ieee80211_key_enable_hw_accel(key); - } - - mutex_unlock(&sdata->local->key_mtx); -} - -void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_sub_if_data *vlan; - mutex_lock(&sdata->local->key_mtx); sdata->crypto_tx_tailroom_needed_cnt = 0; + sdata->crypto_tx_tailroom_pending_dec = 0; if (sdata->vif.type == NL80211_IFTYPE_AP) { - list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) + list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { vlan->crypto_tx_tailroom_needed_cnt = 0; + vlan->crypto_tx_tailroom_pending_dec = 0; + } + } + + if (ieee80211_sdata_running(sdata)) { + list_for_each_entry(key, &sdata->key_list, list) { + increment_tailroom_need_count(sdata); + ieee80211_key_enable_hw_accel(key); + } } mutex_unlock(&sdata->local->key_mtx); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index b8b9cd743bf4..d6d6e89cf7dd 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -2,6 +2,7 @@ /* * Copyright 2002-2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. + * Copyright (C) 2019 Intel Corporation */ #ifndef IEEE80211_KEY_H @@ -156,8 +157,7 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, bool force_synchronize); void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); -void ieee80211_enable_keys(struct ieee80211_sub_if_data *sdata); -void ieee80211_reset_crypto_tx_tailroom(struct ieee80211_sub_if_data *sdata); +void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata); #define key_mtx_dereference(local, ref) \ rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 4c2702f128f3..aba094b4ccfc 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -639,6 +639,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH; local->hw.uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES; local->hw.uapsd_max_sp_len = IEEE80211_DEFAULT_MAX_SP_LEN; + local->hw.max_mtu = IEEE80211_MAX_DATA_LEN; local->user_power_level = IEEE80211_UNSET_POWER_LEVEL; wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask; wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask; @@ -1048,21 +1049,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } - /* Enable Extended Key IDs when driver allowed it, or when it - * supports neither HW crypto nor A-MPDUs + /* Mac80211 and therefore all drivers using SW crypto only + * are able to handle PTK rekeys and Extended Key ID. */ - if ((!local->ops->set_key && - !ieee80211_hw_check(hw, AMPDU_AGGREGATION)) || - ieee80211_hw_check(&local->hw, EXT_KEY_ID_NATIVE)) - wiphy_ext_feature_set(local->hw.wiphy, - NL80211_EXT_FEATURE_EXT_KEY_ID); - - /* Mac80211 and therefore all cards only using SW crypto are able to - * handle PTK rekeys correctly - */ - if (!local->ops->set_key) + if (!local->ops->set_key) { wiphy_ext_feature_set(local->hw.wiphy, NL80211_EXT_FEATURE_CAN_REPLACE_PTK0); + wiphy_ext_feature_set(local->hw.wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID); + } /* * Calculate scan IE length -- we need this to alloc diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 2e7fa743c892..d09b3c789314 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -532,6 +532,61 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, return 0; } +int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 ie_len) +{ + const struct ieee80211_sta_he_cap *he_cap; + struct ieee80211_supported_band *sband; + u8 *pos; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); + + if (!he_cap || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + return 0; + + if (skb_tailroom(skb) < ie_len) + return -ENOMEM; + + pos = skb_put(skb, ie_len); + ieee80211_ie_build_he_cap(pos, he_cap, pos + ie_len); + + return 0; +} + +int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + const struct ieee80211_sta_he_cap *he_cap; + struct ieee80211_supported_band *sband; + u8 *pos; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + he_cap = ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_MESH_POINT); + if (!he_cap || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 || + sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10) + return 0; + + if (skb_tailroom(skb) < 2 + 1 + sizeof(struct ieee80211_he_operation)) + return -ENOMEM; + + pos = skb_put(skb, 2 + 1 + sizeof(struct ieee80211_he_operation)); + ieee80211_ie_build_he_oper(pos); + + return 0; +} + static void ieee80211_mesh_path_timer(struct timer_list *t) { struct ieee80211_sub_if_data *sdata = @@ -677,6 +732,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) struct ieee80211_chanctx_conf *chanctx_conf; struct mesh_csa_settings *csa; enum nl80211_band band; + u8 ie_len_he_cap; u8 *pos; struct ieee80211_sub_if_data *sdata; int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); @@ -687,6 +743,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) band = chanctx_conf->def.chan->band; rcu_read_unlock(); + ie_len_he_cap = ieee80211_ie_len_he_cap(sdata, + NL80211_IFTYPE_MESH_POINT); head_len = hdr_len + 2 + /* NULL SSID */ /* Channel Switch Announcement */ @@ -706,6 +764,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) 2 + sizeof(__le16) + /* awake window */ 2 + sizeof(struct ieee80211_vht_cap) + 2 + sizeof(struct ieee80211_vht_operation) + + ie_len_he_cap + + 2 + 1 + sizeof(struct ieee80211_he_operation) + ifmsh->ie_len; bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL); @@ -823,6 +883,8 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) mesh_add_awake_window_ie(sdata, skb) || mesh_add_vht_cap_ie(sdata, skb) || mesh_add_vht_oper_ie(sdata, skb) || + mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || + mesh_add_he_oper_ie(sdata, skb) || mesh_add_vendor_ies(sdata, skb)) goto out_free; diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 94d57cce70da..953f720754e8 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -218,6 +218,10 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb); +int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 ie_len); +int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb); void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index dd3aefd052a9..737c5f4dbf52 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -218,9 +218,12 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, bool include_plid = false; u16 peering_proto = 0; u8 *pos, ie_len = 4; + u8 ie_len_he_cap; int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; + ie_len_he_cap = ieee80211_ie_len_he_cap(sdata, + NL80211_IFTYPE_MESH_POINT); skb = dev_alloc_skb(local->tx_headroom + hdr_len + 2 + /* capability info */ @@ -233,6 +236,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, 2 + sizeof(struct ieee80211_ht_operation) + 2 + sizeof(struct ieee80211_vht_cap) + 2 + sizeof(struct ieee80211_vht_operation) + + ie_len_he_cap + + 2 + 1 + sizeof(struct ieee80211_he_operation) + 2 + 8 + /* peering IE */ sdata->u.mesh.ie_len); if (!skb) @@ -321,7 +326,9 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, if (mesh_add_ht_cap_ie(sdata, skb) || mesh_add_ht_oper_ie(sdata, skb) || mesh_add_vht_cap_ie(sdata, skb) || - mesh_add_vht_oper_ie(sdata, skb)) + mesh_add_vht_oper_ie(sdata, skb) || + mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) || + mesh_add_he_oper_ie(sdata, skb)) goto free; } @@ -433,6 +440,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, elems->vht_cap_elem, sta); + ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap, + elems->he_cap_len, sta); + if (bw != sta->sta.bandwidth) changed |= IEEE80211_RC_BW_CHANGED; diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 4c888dc9bd81..26a2f49208b6 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -158,10 +158,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap)); ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap); + memset(chandef, 0, sizeof(struct cfg80211_chan_def)); chandef->chan = channel; chandef->width = NL80211_CHAN_WIDTH_20_NOHT; chandef->center_freq1 = channel->center_freq; - chandef->center_freq2 = 0; if (!ht_oper || !sta_ht_cap.ht_supported) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; @@ -2278,8 +2278,9 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, !ifmgd->have_beacon) drv_mgd_prepare_tx(sdata->local, sdata, 0); - ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, stype, - reason, tx, frame_buf); + ieee80211_send_deauth_disassoc(sdata, ifmgd->bssid, + ifmgd->bssid, stype, reason, + tx, frame_buf); } /* flush out frame - make sure the deauth was actually sent */ @@ -2522,7 +2523,10 @@ static void ieee80211_mgd_probe_ap_send(struct ieee80211_sub_if_data *sdata) if (ieee80211_hw_check(&sdata->local->hw, REPORTS_TX_ACK_STATUS)) { ifmgd->nullfunc_failed = false; - ieee80211_send_nullfunc(sdata->local, sdata, false); + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE)) + ifmgd->probe_send_count--; + else + ieee80211_send_nullfunc(sdata->local, sdata, false); } else { int ssid_len; @@ -3391,6 +3395,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, if (elems.uora_element) bss_conf->uora_ocw_range = elems.uora_element[0]; + ieee80211_he_op_ie_to_bss_conf(&sdata->vif, elems.he_operation); + ieee80211_he_spr_ie_to_bss_conf(&sdata->vif, elems.he_spr); /* TODO: OPEN: what happens if BSS color disable is set? */ } @@ -4504,7 +4510,7 @@ void ieee80211_mgd_quiesce(struct ieee80211_sub_if_data *sdata) * cfg80211 won't know and won't actually abort those attempts, * thus we need to do that ourselves. */ - ieee80211_send_deauth_disassoc(sdata, bssid, + ieee80211_send_deauth_disassoc(sdata, bssid, bssid, IEEE80211_STYPE_DEAUTH, WLAN_REASON_DEAUTH_LEAVING, false, frame_buf); @@ -5291,7 +5297,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= IEEE80211_STA_DISABLE_VHT; ifmgd->flags |= IEEE80211_STA_DISABLE_HE; netdev_info(sdata->dev, - "disabling HE/HT/VHT due to WEP/TKIP use\n"); + "disabling HT/VHT/HE due to WEP/TKIP use\n"); } } @@ -5545,7 +5551,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_get_reason_code_string(req->reason_code)); drv_mgd_prepare_tx(sdata->local, sdata, 0); - ieee80211_send_deauth_disassoc(sdata, req->bssid, + ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); @@ -5565,7 +5571,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, ieee80211_get_reason_code_string(req->reason_code)); drv_mgd_prepare_tx(sdata->local, sdata, 0); - ieee80211_send_deauth_disassoc(sdata, req->bssid, + ieee80211_send_deauth_disassoc(sdata, req->bssid, req->bssid, IEEE80211_STYPE_DEAUTH, req->reason_code, tx, frame_buf); diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 60ef8972b254..c710504ccf1a 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -8,6 +8,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> + * Copyright (C) 2019 Intel Corporation */ #include <linux/export.h> #include <net/mac80211.h> @@ -732,7 +733,7 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, } if (local->ops->remain_on_channel) { - ret = drv_cancel_remain_on_channel(local); + ret = drv_cancel_remain_on_channel(local, roc->sdata); if (WARN_ON_ONCE(ret)) { mutex_unlock(&local->mtx); return ret; @@ -991,7 +992,7 @@ void ieee80211_roc_purge(struct ieee80211_local *local, if (roc->started) { if (local->ops->remain_on_channel) { /* can race, so ignore return value */ - drv_cancel_remain_on_channel(local); + drv_cancel_remain_on_channel(local, sdata); ieee80211_roc_notify_destroy(roc); } else { roc->abort = true; diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index 5d5348bc41ec..5397c6dad056 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -60,15 +60,6 @@ static inline void rate_control_add_sta_debugfs(struct sta_info *sta) #endif } -static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) -{ -#ifdef CONFIG_MAC80211_DEBUGFS - struct rate_control_ref *ref = sta->rate_ctrl; - if (ref && ref->ops->remove_sta_debugfs) - ref->ops->remove_sta_debugfs(ref->priv, sta->rate_ctrl_priv); -#endif -} - void ieee80211_check_rate_mask(struct ieee80211_sub_if_data *sdata); /* Get a reference to the rate control algorithm. If `name' is NULL, get the diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h index 3c96a853adbd..51d8b2c846e7 100644 --- a/net/mac80211/rc80211_minstrel.h +++ b/net/mac80211/rc80211_minstrel.h @@ -95,6 +95,7 @@ struct minstrel_sta_info { struct minstrel_priv { struct ieee80211_hw *hw; bool has_mrr; + u32 sample_switch; unsigned int cw_min; unsigned int cw_max; unsigned int max_retry; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5a882da82f0e..0ef2633349b5 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -18,6 +18,8 @@ #define AVG_AMPDU_SIZE 16 #define AVG_PKT_SIZE 1200 +#define SAMPLE_SWITCH_THR 100 + /* Number of bits for an average sized packet */ #define MCS_NBITS ((AVG_PKT_SIZE * AVG_AMPDU_SIZE) << 3) @@ -58,6 +60,7 @@ [GROUP_IDX(_streams, _sgi, _ht40)] = { \ .streams = _streams, \ .shift = _s, \ + .bw = _ht40, \ .flags = \ IEEE80211_TX_RC_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ @@ -94,6 +97,7 @@ [VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \ .streams = _streams, \ .shift = _s, \ + .bw = _bw, \ .flags = \ IEEE80211_TX_RC_VHT_MCS | \ (_sgi ? IEEE80211_TX_RC_SHORT_GI : 0) | \ @@ -486,7 +490,7 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi, tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_ewma; tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob); - if (tmp_cck_tp > tmp_mcs_tp) { + if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) { for(i = 0; i < MAX_THR_RATES; i++) { minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i], tmp_mcs_tp_rate); @@ -526,6 +530,133 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) } } +static inline int +minstrel_get_duration(int index) +{ + const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; + unsigned int duration = group->duration[index % MCS_GROUP_RATES]; + return duration << group->shift; +} + +static bool +minstrel_ht_probe_group(struct minstrel_ht_sta *mi, const struct mcs_group *tp_group, + int tp_idx, const struct mcs_group *group) +{ + if (group->bw < tp_group->bw) + return false; + + if (group->streams == tp_group->streams) + return true; + + if (tp_idx < 4 && group->streams == tp_group->streams - 1) + return true; + + return group->streams == tp_group->streams + 1; +} + +static void +minstrel_ht_find_probe_rates(struct minstrel_ht_sta *mi, u16 *rates, int *n_rates, + bool faster_rate) +{ + const struct mcs_group *group, *tp_group; + int i, g, max_dur; + int tp_idx; + + tp_group = &minstrel_mcs_groups[mi->max_tp_rate[0] / MCS_GROUP_RATES]; + tp_idx = mi->max_tp_rate[0] % MCS_GROUP_RATES; + + max_dur = minstrel_get_duration(mi->max_tp_rate[0]); + if (faster_rate) + max_dur -= max_dur / 16; + + for (g = 0; g < MINSTREL_GROUPS_NB; g++) { + u16 supported = mi->supported[g]; + + if (!supported) + continue; + + group = &minstrel_mcs_groups[g]; + if (!minstrel_ht_probe_group(mi, tp_group, tp_idx, group)) + continue; + + for (i = 0; supported; supported >>= 1, i++) { + int idx; + + if (!(supported & 1)) + continue; + + if ((group->duration[i] << group->shift) > max_dur) + continue; + + idx = g * MCS_GROUP_RATES + i; + if (idx == mi->max_tp_rate[0]) + continue; + + rates[(*n_rates)++] = idx; + break; + } + } +} + +static void +minstrel_ht_rate_sample_switch(struct minstrel_priv *mp, + struct minstrel_ht_sta *mi) +{ + struct minstrel_rate_stats *mrs; + u16 rates[MINSTREL_GROUPS_NB]; + int n_rates = 0; + int probe_rate = 0; + bool faster_rate; + int i; + u8 random; + + /* + * Use rate switching instead of probing packets for devices with + * little control over retry fallback behavior + */ + if (mp->hw->max_rates > 1) + return; + + /* + * If the current EWMA prob is >75%, look for a rate that's 6.25% + * faster than the max tp rate. + * If that fails, look again for a rate that is at least as fast + */ + mrs = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); + faster_rate = mrs->prob_ewma > MINSTREL_FRAC(75, 100); + minstrel_ht_find_probe_rates(mi, rates, &n_rates, faster_rate); + if (!n_rates && faster_rate) + minstrel_ht_find_probe_rates(mi, rates, &n_rates, false); + + /* If no suitable rate was found, try to pick the next one in the group */ + if (!n_rates) { + int g_idx = mi->max_tp_rate[0] / MCS_GROUP_RATES; + u16 supported = mi->supported[g_idx]; + + supported >>= mi->max_tp_rate[0] % MCS_GROUP_RATES; + for (i = 0; supported; supported >>= 1, i++) { + if (!(supported & 1)) + continue; + + probe_rate = mi->max_tp_rate[0] + i; + goto out; + } + + return; + } + + i = 0; + if (n_rates > 1) { + random = prandom_u32(); + i = random % n_rates; + } + probe_rate = rates[i]; + +out: + mi->sample_rate = probe_rate; + mi->sample_mode = MINSTREL_SAMPLE_ACTIVE; +} + /* * Update rate statistics and select new primary rates * @@ -536,7 +667,8 @@ minstrel_ht_prob_rate_reduce_streams(struct minstrel_ht_sta *mi) * higher throughput rates, even if the probablity is a bit lower */ static void -minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) +minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, + bool sample) { struct minstrel_mcs_group_data *mg; struct minstrel_rate_stats *mrs; @@ -544,6 +676,18 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) u16 tmp_mcs_tp_rate[MAX_THR_RATES], tmp_group_tp_rate[MAX_THR_RATES]; u16 tmp_cck_tp_rate[MAX_THR_RATES], index; + mi->sample_mode = MINSTREL_SAMPLE_IDLE; + + if (sample) { + mi->total_packets_cur = mi->total_packets - + mi->total_packets_last; + mi->total_packets_last = mi->total_packets; + } + if (!mp->sample_switch) + sample = false; + if (mi->total_packets_cur < SAMPLE_SWITCH_THR && mp->sample_switch != 1) + sample = false; + if (mi->ampdu_packets > 0) { if (!ieee80211_hw_check(mp->hw, TX_STATUS_NO_AMPDU_LEN)) mi->avg_ampdu_len = minstrel_ewma(mi->avg_ampdu_len, @@ -558,11 +702,19 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) mi->sample_slow = 0; mi->sample_count = 0; - /* Initialize global rate indexes */ - for(j = 0; j < MAX_THR_RATES; j++){ - tmp_mcs_tp_rate[j] = 0; - tmp_cck_tp_rate[j] = 0; - } + memset(tmp_mcs_tp_rate, 0, sizeof(tmp_mcs_tp_rate)); + memset(tmp_cck_tp_rate, 0, sizeof(tmp_cck_tp_rate)); + if (mi->supported[MINSTREL_CCK_GROUP]) + for (j = 0; j < ARRAY_SIZE(tmp_cck_tp_rate); j++) + tmp_cck_tp_rate[j] = MINSTREL_CCK_GROUP * MCS_GROUP_RATES; + + if (mi->supported[MINSTREL_VHT_GROUP_0]) + index = MINSTREL_VHT_GROUP_0 * MCS_GROUP_RATES; + else + index = MINSTREL_HT_GROUP_0 * MCS_GROUP_RATES; + + for (j = 0; j < ARRAY_SIZE(tmp_mcs_tp_rate); j++) + tmp_mcs_tp_rate[j] = index; /* Find best rate sets within all MCS groups*/ for (group = 0; group < ARRAY_SIZE(minstrel_mcs_groups); group++) { @@ -575,7 +727,7 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) /* (re)Initialize group rate indexes */ for(j = 0; j < MAX_THR_RATES; j++) - tmp_group_tp_rate[j] = group; + tmp_group_tp_rate[j] = MCS_GROUP_RATES * group; for (i = 0; i < MCS_GROUP_RATES; i++) { if (!(mi->supported[group] & BIT(i))) @@ -622,12 +774,16 @@ minstrel_ht_update_stats(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) /* try to sample all available rates during each interval */ mi->sample_count *= 8; + if (sample) + minstrel_ht_rate_sample_switch(mp, mi); + #ifdef CONFIG_MAC80211_DEBUGFS /* use fixed index if set */ if (mp->fixed_rate_idx != -1) { for (i = 0; i < 4; i++) mi->max_tp_rate[i] = mp->fixed_rate_idx; mi->max_prob_rate = mp->fixed_rate_idx; + mi->sample_mode = MINSTREL_SAMPLE_IDLE; } #endif @@ -731,15 +887,17 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, struct minstrel_ht_sta_priv *msp = priv_sta; struct minstrel_ht_sta *mi = &msp->ht; struct ieee80211_tx_rate *ar = info->status.rates; - struct minstrel_rate_stats *rate, *rate2; + struct minstrel_rate_stats *rate, *rate2, *rate_sample = NULL; struct minstrel_priv *mp = priv; bool last, update = false; + bool sample_status = false; int i; if (!msp->is_ht) return mac80211_minstrel.tx_status_ext(priv, sband, &msp->legacy, st); + /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) @@ -765,12 +923,17 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) mi->sample_packets += info->status.ampdu_len; + if (mi->sample_mode != MINSTREL_SAMPLE_IDLE) + rate_sample = minstrel_get_ratestats(mi, mi->sample_rate); + last = !minstrel_ht_txstat_valid(mp, &ar[0]); for (i = 0; !last; i++) { last = (i == IEEE80211_TX_MAX_RATES - 1) || !minstrel_ht_txstat_valid(mp, &ar[i + 1]); rate = minstrel_ht_get_stats(mp, mi, &ar[i]); + if (rate == rate_sample) + sample_status = true; if (last) rate->success += info->status.ampdu_ack_len; @@ -778,44 +941,60 @@ minstrel_ht_tx_status(void *priv, struct ieee80211_supported_band *sband, rate->attempts += ar[i].count * info->status.ampdu_len; } - /* - * check for sudden death of spatial multiplexing, - * downgrade to a lower number of streams if necessary. - */ - rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); - if (rate->attempts > 30 && - MINSTREL_FRAC(rate->success, rate->attempts) < - MINSTREL_FRAC(20, 100)) { - minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); + switch (mi->sample_mode) { + case MINSTREL_SAMPLE_IDLE: + break; + + case MINSTREL_SAMPLE_ACTIVE: + if (!sample_status) + break; + + mi->sample_mode = MINSTREL_SAMPLE_PENDING; update = true; - } + break; + + case MINSTREL_SAMPLE_PENDING: + if (sample_status) + break; - rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); - if (rate2->attempts > 30 && - MINSTREL_FRAC(rate2->success, rate2->attempts) < - MINSTREL_FRAC(20, 100)) { - minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); update = true; + minstrel_ht_update_stats(mp, mi, false); + break; + } + + + if (mp->hw->max_rates > 1) { + /* + * check for sudden death of spatial multiplexing, + * downgrade to a lower number of streams if necessary. + */ + rate = minstrel_get_ratestats(mi, mi->max_tp_rate[0]); + if (rate->attempts > 30 && + MINSTREL_FRAC(rate->success, rate->attempts) < + MINSTREL_FRAC(20, 100)) { + minstrel_downgrade_rate(mi, &mi->max_tp_rate[0], true); + update = true; + } + + rate2 = minstrel_get_ratestats(mi, mi->max_tp_rate[1]); + if (rate2->attempts > 30 && + MINSTREL_FRAC(rate2->success, rate2->attempts) < + MINSTREL_FRAC(20, 100)) { + minstrel_downgrade_rate(mi, &mi->max_tp_rate[1], false); + update = true; + } } if (time_after(jiffies, mi->last_stats_update + (mp->update_interval / 2 * HZ) / 1000)) { update = true; - minstrel_ht_update_stats(mp, mi); + minstrel_ht_update_stats(mp, mi, true); } if (update) minstrel_ht_update_rates(mp, mi); } -static inline int -minstrel_get_duration(int index) -{ - const struct mcs_group *group = &minstrel_mcs_groups[index / MCS_GROUP_RATES]; - unsigned int duration = group->duration[index % MCS_GROUP_RATES]; - return duration << group->shift; -} - static void minstrel_calc_retransmit(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, int index) @@ -980,14 +1159,18 @@ static void minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct ieee80211_sta_rates *rates; + u16 first_rate = mi->max_tp_rate[0]; int i = 0; + if (mi->sample_mode == MINSTREL_SAMPLE_ACTIVE) + first_rate = mi->sample_rate; + rates = kzalloc(sizeof(*rates), GFP_ATOMIC); if (!rates) return; /* Start with max_tp_rate[0] */ - minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]); + minstrel_ht_set_rate(mp, mi, rates, i++, first_rate); if (mp->hw->max_rates >= 3) { /* At least 3 tx rates supported, use max_tp_rate[1] next */ @@ -1012,6 +1195,11 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) int tp_rate1, tp_rate2; int sample_idx = 0; + if (mp->hw->max_rates == 1 && mp->sample_switch && + (mi->total_packets_cur >= SAMPLE_SWITCH_THR || + mp->sample_switch == 1)) + return -1; + if (mi->sample_wait > 0) { mi->sample_wait--; return -1; @@ -1059,6 +1247,21 @@ minstrel_get_sample_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) minstrel_get_duration(mi->max_prob_rate) * 3 < sample_dur) return -1; + + /* + * For devices with no configurable multi-rate retry, skip sampling + * below the per-group max throughput rate, and only use one sampling + * attempt per rate + */ + if (mp->hw->max_rates == 1 && + (minstrel_get_duration(mg->max_group_tp_rate[0]) < sample_dur || + mrs->attempts)) + return -1; + + /* Skip already sampled slow rates */ + if (sample_dur >= minstrel_get_duration(tp_rate1) && mrs->attempts) + return -1; + /* * Make sure that lower rates get sampled only occasionally, * if the link is working perfectly. @@ -1318,7 +1521,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, mi->supported[MINSTREL_CCK_GROUP] |= mi->cck_supported_short << 4; /* create an initial rate table with the lowest supported rates */ - minstrel_ht_update_stats(mp, mi); + minstrel_ht_update_stats(mp, mi, true); minstrel_ht_update_rates(mp, mi); return; @@ -1436,6 +1639,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) if (!mp) return NULL; + mp->sample_switch = -1; + /* contention window settings * Just an approximation. Using the per-queue values would complicate * the calculations and is probably unnecessary */ @@ -1467,6 +1672,8 @@ minstrel_ht_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) mp->fixed_rate_idx = (u32) -1; debugfs_create_u32("fixed_rate_idx", S_IRUGO | S_IWUGO, debugfsdir, &mp->fixed_rate_idx); + debugfs_create_u32("sample_switch", S_IRUGO | S_IWUSR, debugfsdir, + &mp->sample_switch); #endif minstrel_ht_init_cck_rates(mp); diff --git a/net/mac80211/rc80211_minstrel_ht.h b/net/mac80211/rc80211_minstrel_ht.h index 80296268c778..f938701e7ab7 100644 --- a/net/mac80211/rc80211_minstrel_ht.h +++ b/net/mac80211/rc80211_minstrel_ht.h @@ -33,6 +33,7 @@ struct mcs_group { u16 flags; u8 streams; u8 shift; + u8 bw; u16 duration[MCS_GROUP_RATES]; }; @@ -50,6 +51,12 @@ struct minstrel_mcs_group_data { struct minstrel_rate_stats rates[MCS_GROUP_RATES]; }; +enum minstrel_sample_mode { + MINSTREL_SAMPLE_IDLE, + MINSTREL_SAMPLE_ACTIVE, + MINSTREL_SAMPLE_PENDING, +}; + struct minstrel_ht_sta { struct ieee80211_sta *sta; @@ -71,6 +78,8 @@ struct minstrel_ht_sta { unsigned int overhead; unsigned int overhead_rtscts; + unsigned int total_packets_last; + unsigned int total_packets_cur; unsigned int total_packets; unsigned int sample_packets; @@ -82,6 +91,9 @@ struct minstrel_ht_sta { u8 sample_count; u8 sample_slow; + enum minstrel_sample_mode sample_mode; + u16 sample_rate; + /* current MCS group to be sampled */ u8 sample_group; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 5fb368cc2633..bd11fef2139f 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1065,7 +1065,6 @@ static void __sta_info_destroy_part2(struct sta_info *sta) cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); - rate_control_remove_sta_debugfs(sta); ieee80211_sta_debugfs_remove(sta); cleanup_single_sta(sta); @@ -1962,6 +1961,7 @@ int sta_info_move_state(struct sta_info *sta, case IEEE80211_STA_ASSOC: if (sta->sta_state == IEEE80211_STA_AUTH) { set_bit(WLAN_STA_ASSOC, &sta->_flags); + sta->assoc_at = ktime_get_boottime_ns(); ieee80211_recalc_min_chandef(sta->sdata); if (!sta->sta.support_p2p_ps) ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); @@ -2195,6 +2195,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, BIT_ULL(NL80211_STA_INFO_STA_FLAGS) | BIT_ULL(NL80211_STA_INFO_BSS_PARAM) | BIT_ULL(NL80211_STA_INFO_CONNECTED_TIME) | + BIT_ULL(NL80211_STA_INFO_ASSOC_AT_BOOTTIME) | BIT_ULL(NL80211_STA_INFO_RX_DROP_MISC); if (sdata->vif.type == NL80211_IFTYPE_STATION) { @@ -2203,6 +2204,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo, } sinfo->connected_time = ktime_get_seconds() - sta->last_connected; + sinfo->assoc_at = sta->assoc_at; sinfo->inactive_time = jiffies_to_msecs(jiffies - ieee80211_sta_last_active(sta)); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3260d4234920..369c2dddce52 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -466,6 +466,7 @@ struct ieee80211_sta_rx_stats { * the station when it leaves powersave or polls for frames * @driver_buffered_tids: bitmap of TIDs the driver has data buffered on * @txq_buffered_tids: bitmap of TIDs that mac80211 has txq data buffered on + * @assoc_at: clock boottime (in ns) of last association * @last_connected: time (in seconds) when a station got connected * @last_seq_ctrl: last received seq/frag number from this STA (per TID * plus one for non-QoS frames) @@ -562,6 +563,7 @@ struct sta_info { unsigned long driver_buffered_tids; unsigned long txq_buffered_tids; + u64 assoc_at; long last_connected; /* Updated from RX path only, no locking requirements */ diff --git a/net/mac80211/status.c b/net/mac80211/status.c index a88e3bf17e9d..ab8ba5835ca0 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -254,14 +254,22 @@ static void ieee80211_set_bar_pending(struct sta_info *sta, u8 tid, u16 ssn) tid_tx->bar_pending = true; } -static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info) +static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info, + struct ieee80211_tx_status *status) { int len = sizeof(struct ieee80211_radiotap_header); /* IEEE80211_RADIOTAP_RATE rate */ - if (info->status.rates[0].idx >= 0 && - !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | - IEEE80211_TX_RC_VHT_MCS))) + if (status && status->rate && !(status->rate->flags & + (RATE_INFO_FLAGS_MCS | + RATE_INFO_FLAGS_DMG | + RATE_INFO_FLAGS_EDMG | + RATE_INFO_FLAGS_VHT_MCS | + RATE_INFO_FLAGS_HE_MCS))) + len += 2; + else if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & + (IEEE80211_TX_RC_MCS | IEEE80211_TX_RC_VHT_MCS))) len += 2; /* IEEE80211_RADIOTAP_TX_FLAGS */ @@ -272,7 +280,14 @@ static int ieee80211_tx_radiotap_len(struct ieee80211_tx_info *info) /* IEEE80211_RADIOTAP_MCS * IEEE80211_RADIOTAP_VHT */ - if (info->status.rates[0].idx >= 0) { + if (status && status->rate) { + if (status->rate->flags & RATE_INFO_FLAGS_MCS) + len += 3; + else if (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS) + len = ALIGN(len, 2) + 12; + else if (status->rate->flags & RATE_INFO_FLAGS_HE_MCS) + len = ALIGN(len, 2) + 12; + } else if (info->status.rates[0].idx >= 0) { if (info->status.rates[0].flags & IEEE80211_TX_RC_MCS) len += 3; else if (info->status.rates[0].flags & IEEE80211_TX_RC_VHT_MCS) @@ -286,12 +301,14 @@ static void ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, struct ieee80211_supported_band *sband, struct sk_buff *skb, int retry_count, - int rtap_len, int shift) + int rtap_len, int shift, + struct ieee80211_tx_status *status) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct ieee80211_radiotap_header *rthdr; unsigned char *pos; + u16 legacy_rate = 0; u16 txflags; rthdr = skb_push(skb, rtap_len); @@ -310,14 +327,23 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, */ /* IEEE80211_RADIOTAP_RATE */ - if (info->status.rates[0].idx >= 0 && - !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | - IEEE80211_TX_RC_VHT_MCS))) { - u16 rate; + if (status && status->rate) { + if (!(status->rate->flags & (RATE_INFO_FLAGS_MCS | + RATE_INFO_FLAGS_DMG | + RATE_INFO_FLAGS_EDMG | + RATE_INFO_FLAGS_VHT_MCS | + RATE_INFO_FLAGS_HE_MCS))) + legacy_rate = status->rate->legacy; + } else if (info->status.rates[0].idx >= 0 && + !(info->status.rates[0].flags & (IEEE80211_TX_RC_MCS | + IEEE80211_TX_RC_VHT_MCS))) + legacy_rate = + sband->bitrates[info->status.rates[0].idx].bitrate; + + if (legacy_rate) { rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_RATE); - rate = sband->bitrates[info->status.rates[0].idx].bitrate; - *pos = DIV_ROUND_UP(rate, 5 * (1 << shift)); + *pos = DIV_ROUND_UP(legacy_rate, 5 * (1 << shift)); /* padding for tx flags */ pos += 2; } @@ -341,7 +367,140 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local, *pos = retry_count; pos++; - if (info->status.rates[0].idx < 0) + if (status && status->rate && + (status->rate->flags & RATE_INFO_FLAGS_MCS)) { + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_MCS); + pos[0] = IEEE80211_RADIOTAP_MCS_HAVE_MCS | + IEEE80211_RADIOTAP_MCS_HAVE_GI | + IEEE80211_RADIOTAP_MCS_HAVE_BW; + if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI) + pos[1] |= IEEE80211_RADIOTAP_MCS_SGI; + if (status->rate->bw == RATE_INFO_BW_40) + pos[1] |= IEEE80211_RADIOTAP_MCS_BW_40; + pos[2] = status->rate->mcs; + pos += 3; + } else if (status && status->rate && + (status->rate->flags & RATE_INFO_FLAGS_VHT_MCS)) { + u16 known = local->hw.radiotap_vht_details & + (IEEE80211_RADIOTAP_VHT_KNOWN_GI | + IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH); + + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT); + + /* required alignment from rthdr */ + pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2); + + /* u16 known - IEEE80211_RADIOTAP_VHT_KNOWN_* */ + put_unaligned_le16(known, pos); + pos += 2; + + /* u8 flags - IEEE80211_RADIOTAP_VHT_FLAG_* */ + if (status->rate->flags & RATE_INFO_FLAGS_SHORT_GI) + *pos |= IEEE80211_RADIOTAP_VHT_FLAG_SGI; + pos++; + + /* u8 bandwidth */ + switch (status->rate->bw) { + case RATE_INFO_BW_160: + *pos = 11; + break; + case RATE_INFO_BW_80: + *pos = 4; + break; + case RATE_INFO_BW_40: + *pos = 1; + break; + default: + *pos = 0; + break; + } + pos++; + + /* u8 mcs_nss[4] */ + *pos = (status->rate->mcs << 4) | status->rate->nss; + pos += 4; + + /* u8 coding */ + pos++; + /* u8 group_id */ + pos++; + /* u16 partial_aid */ + pos += 2; + } else if (status && status->rate && + (status->rate->flags & RATE_INFO_FLAGS_HE_MCS)) { + struct ieee80211_radiotap_he *he; + + rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_HE); + + /* required alignment from rthdr */ + pos = (u8 *)rthdr + ALIGN(pos - (u8 *)rthdr, 2); + he = (struct ieee80211_radiotap_he *)pos; + + he->data1 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA1_FORMAT_SU | + IEEE80211_RADIOTAP_HE_DATA1_DATA_MCS_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_DATA_DCM_KNOWN | + IEEE80211_RADIOTAP_HE_DATA1_BW_RU_ALLOC_KNOWN); + + he->data2 = cpu_to_le16(IEEE80211_RADIOTAP_HE_DATA2_GI_KNOWN); + +#define HE_PREP(f, val) le16_encode_bits(val, IEEE80211_RADIOTAP_HE_##f) + + he->data6 |= HE_PREP(DATA6_NSTS, status->rate->nss); + +#define CHECK_GI(s) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_GI_##s != \ + (int)NL80211_RATE_INFO_HE_GI_##s) + + CHECK_GI(0_8); + CHECK_GI(1_6); + CHECK_GI(3_2); + + he->data3 |= HE_PREP(DATA3_DATA_MCS, status->rate->mcs); + he->data3 |= HE_PREP(DATA3_DATA_DCM, status->rate->he_dcm); + + he->data5 |= HE_PREP(DATA5_GI, status->rate->he_gi); + + switch (status->rate->bw) { + case RATE_INFO_BW_20: + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_20MHZ); + break; + case RATE_INFO_BW_40: + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_40MHZ); + break; + case RATE_INFO_BW_80: + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_80MHZ); + break; + case RATE_INFO_BW_160: + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_160MHZ); + break; + case RATE_INFO_BW_HE_RU: +#define CHECK_RU_ALLOC(s) \ + BUILD_BUG_ON(IEEE80211_RADIOTAP_HE_DATA5_DATA_BW_RU_ALLOC_##s##T != \ + NL80211_RATE_INFO_HE_RU_ALLOC_##s + 4) + + CHECK_RU_ALLOC(26); + CHECK_RU_ALLOC(52); + CHECK_RU_ALLOC(106); + CHECK_RU_ALLOC(242); + CHECK_RU_ALLOC(484); + CHECK_RU_ALLOC(996); + CHECK_RU_ALLOC(2x996); + + he->data5 |= HE_PREP(DATA5_DATA_BW_RU_ALLOC, + status->rate->he_ru_alloc + 4); + break; + default: + WARN_ONCE(1, "Invalid SU BW %d\n", status->rate->bw); + } + + pos += sizeof(struct ieee80211_radiotap_he); + } + + if ((status && status->rate) || info->status.rates[0].idx < 0) return; /* IEEE80211_RADIOTAP_MCS @@ -645,7 +804,8 @@ static int ieee80211_tx_get_rates(struct ieee80211_hw *hw, void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, struct ieee80211_supported_band *sband, - int retry_count, int shift, bool send_to_cooked) + int retry_count, int shift, bool send_to_cooked, + struct ieee80211_tx_status *status) { struct sk_buff *skb2; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); @@ -654,14 +814,14 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, int rtap_len; /* send frame to monitor interfaces now */ - rtap_len = ieee80211_tx_radiotap_len(info); + rtap_len = ieee80211_tx_radiotap_len(info, status); if (WARN_ON_ONCE(skb_headroom(skb) < rtap_len)) { pr_err("ieee80211_tx_status: headroom too small\n"); dev_kfree_skb(skb); return; } ieee80211_add_tx_radiotap_header(local, sband, skb, retry_count, - rtap_len, shift); + rtap_len, shift, status); /* XXX: is this sufficient for BPF? */ skb_reset_mac_header(skb); @@ -901,7 +1061,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw, } /* send to monitor interfaces */ - ieee80211_tx_monitor(local, skb, sband, retry_count, shift, send_to_cooked); + ieee80211_tx_monitor(local, skb, sband, retry_count, shift, + send_to_cooked, status); } void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 3bb4459b52c7..4768322dc202 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1242,9 +1242,10 @@ TRACE_EVENT(drv_remain_on_channel, ) ); -DEFINE_EVENT(local_only_evt, drv_cancel_remain_on_channel, - TP_PROTO(struct ieee80211_local *local), - TP_ARGS(local) +DEFINE_EVENT(local_sdata_evt, drv_cancel_remain_on_channel, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + TP_ARGS(local, sdata) ); TRACE_EVENT(drv_set_ringparam, diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index f13eb2f61ccf..1fa422782905 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -162,6 +162,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, break; } case NL80211_BAND_5GHZ: + case NL80211_BAND_6GHZ: if (r->flags & IEEE80211_RATE_MANDATORY_A) mrate = r->bitrate; break; @@ -3546,6 +3547,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, ieee80211_tx_result r; struct ieee80211_vif *vif = txq->vif; + WARN_ON_ONCE(softirq_count() == 0); + begin: spin_lock_bh(&fq->lock); @@ -4647,7 +4650,8 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw, if (!sband) return bcn; - ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false); + ieee80211_tx_monitor(hw_to_local(hw), copy, sband, 1, shift, false, + NULL); return bcn; } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ad1e58184c4e..051a02ddcb85 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1200,6 +1200,13 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, elems->cisco_dtpc_elem = pos; break; + case WLAN_EID_ADDBA_EXT: + if (elen != sizeof(struct ieee80211_addba_ext_ie)) { + elem_parse_failed = true; + break; + } + elems->addba_ext_ie = (void *)pos; + break; case WLAN_EID_TIMEOUT_INTERVAL: if (elen >= sizeof(struct ieee80211_timeout_interval_ie)) elems->timeout_int = (void *)pos; @@ -1233,6 +1240,10 @@ _ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action, WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION && elen == 3) { elems->mbssid_config_ie = (void *)&pos[1]; + } else if (pos[0] == WLAN_EID_EXT_HE_SPR && + elen >= sizeof(*elems->he_spr) && + elen >= ieee80211_he_spr_size(&pos[1])) { + elems->he_spr = (void *)&pos[1]; } break; default: @@ -1572,7 +1583,8 @@ void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, } void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, - const u8 *bssid, u16 stype, u16 reason, + const u8 *da, const u8 *bssid, + u16 stype, u16 reason, bool send_frame, u8 *frame_buf) { struct ieee80211_local *local = sdata->local; @@ -1583,7 +1595,7 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); mgmt->duration = 0; /* initialize only */ mgmt->seq_ctrl = 0; /* initialize only */ - memcpy(mgmt->da, bssid, ETH_ALEN); + memcpy(mgmt->da, da, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, bssid, ETH_ALEN); /* u.deauth.reason_code == u.disassoc.reason_code */ @@ -2409,11 +2421,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* add back keys */ list_for_each_entry(sdata, &local->interfaces, list) - ieee80211_reset_crypto_tx_tailroom(sdata); - - list_for_each_entry(sdata, &local->interfaces, list) - if (ieee80211_sdata_running(sdata)) - ieee80211_enable_keys(sdata); + ieee80211_reenable_keys(sdata); /* Reconfigure sched scan if it was interrupted by FW restart */ mutex_lock(&local->mtx); @@ -2702,6 +2710,27 @@ u8 *ieee80211_ie_build_vht_cap(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos; } +u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype) +{ + const struct ieee80211_sta_he_cap *he_cap; + struct ieee80211_supported_band *sband; + u8 n; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return 0; + + he_cap = ieee80211_get_he_iftype_cap(sband, iftype); + if (!he_cap) + return 0; + + n = ieee80211_he_mcs_nss_size(&he_cap->he_cap_elem); + return 2 + 1 + + sizeof(he_cap->he_cap_elem) + n + + ieee80211_he_ppe_size(he_cap->ppe_thres[0], + he_cap->he_cap_elem.phy_cap_info); +} + u8 *ieee80211_ie_build_he_cap(u8 *pos, const struct ieee80211_sta_he_cap *he_cap, u8 *end) @@ -2891,6 +2920,34 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap, return pos + sizeof(struct ieee80211_vht_operation); } +u8 *ieee80211_ie_build_he_oper(u8 *pos) +{ + struct ieee80211_he_operation *he_oper; + u32 he_oper_params; + + *pos++ = WLAN_EID_EXTENSION; + *pos++ = 1 + sizeof(struct ieee80211_he_operation); + *pos++ = WLAN_EID_EXT_HE_OPERATION; + + he_oper_params = 0; + he_oper_params |= u32_encode_bits(1023, /* disabled */ + IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK); + he_oper_params |= u32_encode_bits(1, + IEEE80211_HE_OPERATION_ER_SU_DISABLE); + he_oper_params |= u32_encode_bits(1, + IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED); + + he_oper = (struct ieee80211_he_operation *)pos; + he_oper->he_oper_params = cpu_to_le32(he_oper_params); + + /* don't require special HE peer rates */ + he_oper->he_mcs_nss_set = cpu_to_le16(0xffff); + + /* TODO add VHT operational and 6GHz operational subelement? */ + + return pos + sizeof(struct ieee80211_vht_operation); +} + bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper, struct cfg80211_chan_def *chandef) { diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index b20ff28d9f30..ccdcb9ad9ac7 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018 - 2019 Intel Corporation */ #include <linux/ieee80211.h> @@ -349,6 +349,14 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cap_rx_bw(struct sta_info *sta) cap_width == IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ) return IEEE80211_STA_RX_BW_160; + /* + * If this is non-zero, then it does support 160 MHz after all, + * in one form or the other. We don't distinguish here (or even + * above) between 160 and 80+80 yet. + */ + if (vht_cap->cap & IEEE80211_VHT_CAP_EXT_NSS_BW_MASK) + return IEEE80211_STA_RX_BW_160; + return IEEE80211_STA_RX_BW_80; } diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index ee72779729e5..91bf32af55e9 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -946,7 +946,8 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) info = IEEE80211_SKB_CB(skb); - if (info->control.hw_key) + if (info->control.hw_key && + !(key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIE)) return TX_CONTINUE; if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie))) @@ -962,6 +963,9 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx) bip_ipn_set64(mmie->sequence_number, pn64); + if (info->control.hw_key) + return TX_CONTINUE; + bip_aad(skb, aad); /* diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index a8e9def593f2..80938b338fee 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -387,6 +387,9 @@ struct ncsi_aen_hncdsc_pkt { #define NCSI_PKT_CMD_OEM 0x50 /* OEM */ #define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ #define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ +#define NCSI_PKT_CMD_QPNPR 0x56 /* Query Pending NC PLDM request */ +#define NCSI_PKT_CMD_SNPR 0x57 /* Send NC PLDM Reply */ + /* NCSI packet responses */ #define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) @@ -419,6 +422,8 @@ struct ncsi_aen_hncdsc_pkt { #define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) #define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) #define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) +#define NCSI_PKT_RSP_QPNPR (NCSI_PKT_CMD_QPNPR + 0x80) +#define NCSI_PKT_RSP_SNPR (NCSI_PKT_CMD_SNPR + 0x80) /* NCSI response code/reason */ #define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index d876bd55f356..d5611f04926d 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -1038,6 +1038,11 @@ static int ncsi_rsp_handler_gpuuid(struct ncsi_request *nr) return 0; } +static int ncsi_rsp_handler_pldm(struct ncsi_request *nr) +{ + return 0; +} + static int ncsi_rsp_handler_netlink(struct ncsi_request *nr) { struct ncsi_dev_priv *ndp = nr->ndp; @@ -1086,13 +1091,15 @@ static struct ncsi_rsp_handler { { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi }, { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp }, - { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps }, - { NCSI_PKT_RSP_GNS, 172, ncsi_rsp_handler_gns }, - { NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts }, + { NCSI_PKT_RSP_GCPS, 204, ncsi_rsp_handler_gcps }, + { NCSI_PKT_RSP_GNS, 32, ncsi_rsp_handler_gns }, + { NCSI_PKT_RSP_GNPTS, 48, ncsi_rsp_handler_gnpts }, { NCSI_PKT_RSP_GPS, 8, ncsi_rsp_handler_gps }, { NCSI_PKT_RSP_OEM, -1, ncsi_rsp_handler_oem }, - { NCSI_PKT_RSP_PLDM, 0, NULL }, - { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid } + { NCSI_PKT_RSP_PLDM, -1, ncsi_rsp_handler_pldm }, + { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid }, + { NCSI_PKT_RSP_QPNPR, -1, ncsi_rsp_handler_pldm }, + { NCSI_PKT_RSP_SNPR, -1, ncsi_rsp_handler_pldm } }; int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0d65f4d39494..34ec7afec116 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -20,7 +20,7 @@ config NETFILTER_FAMILY_ARP bool config NETFILTER_NETLINK_ACCT -tristate "Netfilter NFACCT over NFNETLINK interface" + tristate "Netfilter NFACCT over NFNETLINK interface" depends on NETFILTER_ADVANCED select NETFILTER_NETLINK help @@ -34,7 +34,7 @@ config NETFILTER_NETLINK_QUEUE help If this option is enabled, the kernel will include support for queueing packets via NFNETLINK. - + config NETFILTER_NETLINK_LOG tristate "Netfilter LOG over NFNETLINK interface" default m if NETFILTER_ADVANCED=n @@ -1502,7 +1502,7 @@ config NETFILTER_XT_MATCH_REALM This option adds a `realm' match, which allows you to use the realm key from the routing subsystem inside iptables. - This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option + This match pretty much resembles the CONFIG_NET_CLS_ROUTE4 option in tc world. If you want to compile it as a module, say M here and read @@ -1523,7 +1523,7 @@ config NETFILTER_XT_MATCH_SCTP depends on NETFILTER_ADVANCED default IP_SCTP help - With this option enabled, you will be able to use the + With this option enabled, you will be able to use the `sctp' match in order to match on SCTP source/destination ports and SCTP chunk types. diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 9270a7fae484..4fc075b612fe 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -124,7 +124,7 @@ nf_flow_table-objs := nf_flow_table_core.o nf_flow_table_ip.o obj-$(CONFIG_NF_FLOW_TABLE_INET) += nf_flow_table_inet.o -# generic X tables +# generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # combos diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 0feb77fa9edc..d098d87bc331 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -7,7 +7,7 @@ #include <linux/rcupdate.h> #include <linux/jhash.h> #include <linux/types.h> -#include <linux/netfilter/ipset/ip_set_timeout.h> +#include <linux/netfilter/ipset/ip_set.h> #define __ipset_dereference_protected(p, c) rcu_dereference_protected(p, c) #define ipset_dereference_protected(p, set) \ @@ -953,7 +953,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); - n = rcu_dereference_bh(hbucket(t, key)); + n = rcu_dereference_bh(hbucket(t, key)); if (!n) continue; for (i = 0; i < n->pos; i++) { diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 6f9ead6319e0..67ac50104e6f 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -288,7 +288,7 @@ list_set_uadd(struct ip_set *set, void *value, const struct ip_set_ext *ext, if (n && !(SET_WITH_TIMEOUT(set) && ip_set_timeout_expired(ext_timeout(n, set)))) - n = NULL; + n = NULL; e = kzalloc(set->dsize, GFP_ATOMIC); if (!e) diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 46f06f92ab8f..8b80ab794a92 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -617,7 +617,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && iph->protocol == IPPROTO_UDP) ? IP_VS_CONN_F_ONE_PACKET : 0; - union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; + union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 060565e7d227..8b48e7ce1c2c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -262,7 +262,7 @@ static inline unsigned int ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { - register unsigned int porth = ntohs(port); + unsigned int porth = ntohs(port); __be32 addr_fold = addr->ip; __u32 ahash; @@ -493,7 +493,7 @@ static inline unsigned int ip_vs_rs_hashkey(int af, const union nf_inet_addr *addr, __be16 port) { - register unsigned int porth = ntohs(port); + unsigned int porth = ntohs(port); __be32 addr_fold = addr->ip; #ifdef CONFIG_IP_VS_IPV6 @@ -1737,12 +1737,18 @@ proc_do_defense_mode(struct ctl_table *table, int write, int val = *valp; int rc; - rc = proc_dointvec(table, write, buffer, lenp, ppos); + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write && (*valp != val)) { - if ((*valp < 0) || (*valp > 3)) { - /* Restore the correct value */ - *valp = val; + if (val < 0 || val > 3) { + rc = -EINVAL; } else { + *valp = val; update_defense_level(ipvs); } } @@ -1756,33 +1762,20 @@ proc_do_sync_threshold(struct ctl_table *table, int write, int *valp = table->data; int val[2]; int rc; + struct ctl_table tmp = { + .data = &val, + .maxlen = table->maxlen, + .mode = table->mode, + }; - /* backup the value first */ memcpy(val, valp, sizeof(val)); - - rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (valp[0] < 0 || valp[1] < 0 || - (valp[0] >= valp[1] && valp[1]))) { - /* Restore the correct value */ - memcpy(valp, val, sizeof(val)); - } - return rc; -} - -static int -proc_do_sync_mode(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int *valp = table->data; - int val = *valp; - int rc; - - rc = proc_dointvec(table, write, buffer, lenp, ppos); - if (write && (*valp != val)) { - if ((*valp < 0) || (*valp > 1)) { - /* Restore the correct value */ - *valp = val; - } + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); + if (write) { + if (val[0] < 0 || val[1] < 0 || + (val[0] >= val[1] && val[1])) + rc = -EINVAL; + else + memcpy(valp, val, sizeof(val)); } return rc; } @@ -1795,12 +1788,18 @@ proc_do_sync_ports(struct ctl_table *table, int write, int val = *valp; int rc; - rc = proc_dointvec(table, write, buffer, lenp, ppos); + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(int), + .mode = table->mode, + }; + + rc = proc_dointvec(&tmp, write, buffer, lenp, ppos); if (write && (*valp != val)) { - if (*valp < 1 || !is_power_of_2(*valp)) { - /* Restore the correct value */ + if (val < 1 || !is_power_of_2(val)) + rc = -EINVAL; + else *valp = val; - } } return rc; } @@ -1860,7 +1859,9 @@ static struct ctl_table vs_vars[] = { .procname = "sync_version", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_do_sync_mode, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, { .procname = "sync_ports", diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c8b5a504476c..77c323c36a88 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -160,7 +160,7 @@ static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) /* get weighted least-connection node in the destination set */ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) { - register struct ip_vs_dest_set_elem *e; + struct ip_vs_dest_set_elem *e; struct ip_vs_dest *dest, *least; int loh, doh; @@ -209,7 +209,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) /* get weighted most-connection node in the destination set */ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) { - register struct ip_vs_dest_set_elem *e; + struct ip_vs_dest_set_elem *e; struct ip_vs_dest *dest, *most; int moh, doh; diff --git a/net/netfilter/ipvs/ip_vs_mh.c b/net/netfilter/ipvs/ip_vs_mh.c index 94d9d349ebb0..da0280cec506 100644 --- a/net/netfilter/ipvs/ip_vs_mh.c +++ b/net/netfilter/ipvs/ip_vs_mh.c @@ -174,8 +174,8 @@ static int ip_vs_mh_populate(struct ip_vs_mh_state *s, return 0; } - table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE), - sizeof(unsigned long), GFP_KERNEL); + table = kcalloc(BITS_TO_LONGS(IP_VS_MH_TAB_SIZE), + sizeof(unsigned long), GFP_KERNEL); if (!table) return -ENOMEM; diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 000d961b97e4..32b028853a7c 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -710,7 +710,7 @@ static int __ip_vs_tcp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd sizeof(tcp_timeouts)); if (!pd->timeout_table) return -ENOMEM; - pd->tcp_state_table = tcp_states; + pd->tcp_state_table = tcp_states; return 0; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 81a8ef42b88d..0c63120b2db2 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -73,8 +73,7 @@ struct conntrack_gc_work { }; static __read_mostly struct kmem_cache *nf_conntrack_cachep; -static __read_mostly spinlock_t nf_conntrack_locks_all_lock; -static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); +static DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; /* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */ diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 5e2812ee2149..6fba74b5aaf7 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -24,6 +24,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> static DEFINE_MUTEX(nf_ct_ecache_mutex); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 65364de915d1..42557d2b6a90 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -25,8 +25,10 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_expect.h> #include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> #include <net/netfilter/nf_conntrack_zones.h> diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 8d96738b7dfd..9eca90414bb7 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -162,7 +162,7 @@ static int try_rfc959(const char *data, size_t dlen, if (length == 0) return 0; - cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | + cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]); cmd->u.tcp.port = htons((array[4] << 8) | array[5]); return length; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8d729e7c36ff..118f415928ae 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -21,10 +21,11 @@ #include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> -#include <net/netfilter/nf_conntrack_l4proto.h> -#include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_log.h> static DEFINE_MUTEX(nf_ct_helper_mutex); diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c index 74b8113f7aeb..522792556632 100644 --- a/net/netfilter/nf_conntrack_labels.c +++ b/net/netfilter/nf_conntrack_labels.c @@ -11,7 +11,7 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_labels.h> -static spinlock_t nf_connlabels_lock; +static DEFINE_SPINLOCK(nf_connlabels_lock); static int replace_u32(u32 *address, u32 mask, u32 new) { @@ -89,7 +89,6 @@ int nf_conntrack_labels_init(void) { BUILD_BUG_ON(NF_CT_LABELS_MAX_SIZE / sizeof(long) >= U8_MAX); - spin_lock_init(&nf_connlabels_lock); return nf_ct_extend_register(&labels_extend); } diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c index 7e317e6698ba..6f9144e1f1c1 100644 --- a/net/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/netfilter/nf_conntrack_proto_icmpv6.c @@ -22,7 +22,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_timeout.h> #include <net/netfilter/nf_conntrack_zones.h> -#include <net/netfilter/ipv6/nf_conntrack_icmpv6.h> #include <net/netfilter/nf_log.h> static const unsigned int nf_ct_icmpv6_timeout = 30*HZ; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 85c1f8c213b0..1926fd56df56 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1227,7 +1227,7 @@ static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = { [CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_WSCALE_REPLY] = { .type = NLA_U8 }, [CTA_PROTOINFO_TCP_FLAGS_ORIGINAL] = { .len = sizeof(struct nf_ct_tcp_flags) }, - [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, + [CTA_PROTOINFO_TCP_FLAGS_REPLY] = { .len = sizeof(struct nf_ct_tcp_flags) }, }; #define TCP_NLATTR_SIZE ( \ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0006503d2da9..410809c669e1 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -511,8 +511,6 @@ static void nf_conntrack_standalone_fini_proc(struct net *net) /* Log invalid packets of a given protocol */ static int log_invalid_proto_min __read_mostly; static int log_invalid_proto_max __read_mostly = 255; -static int zero; -static int one = 1; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; @@ -629,8 +627,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_LOG_INVALID] = { .procname = "nf_conntrack_log_invalid", @@ -654,8 +652,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_HELPER] = { .procname = "nf_conntrack_helper", @@ -663,8 +661,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #ifdef CONFIG_NF_CONNTRACK_EVENTS [NF_SYSCTL_CT_EVENTS] = { @@ -673,8 +671,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_NF_CONNTRACK_TIMESTAMP @@ -684,8 +682,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif [NF_SYSCTL_CT_PROTO_TIMEOUT_GENERIC] = { @@ -759,16 +757,16 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_LIBERAL] = { .procname = "nf_conntrack_tcp_be_liberal", .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, [NF_SYSCTL_CT_PROTO_TCP_MAX_RETRANS] = { .procname = "nf_conntrack_tcp_max_retrans", @@ -904,8 +902,8 @@ static struct ctl_table nf_ct_sysctl_table[] = { .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, }, #endif #ifdef CONFIG_NF_CT_PROTO_GRE @@ -1169,7 +1167,6 @@ static int __init nf_conntrack_standalone_init(void) if (ret < 0) goto out_start; - BUILD_BUG_ON(SKB_NFCT_PTRMASK != NFCT_PTRMASK); BUILD_BUG_ON(NFCT_INFOMASK <= IP_CT_NUMBER); #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c index 13d0f4a92647..14387e0b8008 100644 --- a/net/netfilter/nf_conntrack_timeout.c +++ b/net/netfilter/nf_conntrack_timeout.c @@ -19,6 +19,7 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_timeout.h> struct nf_ct_timeout * diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index 5a35ef08c3cb..f108a76925dd 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -10,6 +10,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev) @@ -50,5 +51,25 @@ void nf_dup_netdev_egress(const struct nft_pktinfo *pkt, int oif) } EXPORT_SYMBOL_GPL(nf_dup_netdev_egress); +int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + enum flow_action_id id, int oif) +{ + struct flow_action_entry *entry; + struct net_device *dev; + + /* nft_flow_rule_destroy() releases the reference on this device. */ + dev = dev_get_by_index(ctx->net, oif); + if (!dev) + return -EOPNOTSUPP; + + entry = &flow->rule->action.entries[ctx->num_actions++]; + entry->id = id; + entry->dev = dev; + + return 0; +} +EXPORT_SYMBOL_GPL(nft_fwd_dup_netdev_offload); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index a0b4bf654de2..132f5228b431 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -11,6 +11,7 @@ #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_tuple.h> struct flow_offload_entry { diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 3f6023ed4966..bfc555fcbc72 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -18,12 +18,12 @@ #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> -#include <net/netfilter/nf_nat.h> -#include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/nf_conntrack_zones.h> -#include <linux/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <uapi/linux/netfilter/nf_nat.h> #include "nf_internals.h" diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 7ac733ebd060..0a59c14b5177 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -722,7 +722,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb, return ret; } -const struct nf_hook_ops nf_nat_ipv4_ops[] = { +static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv4_in, @@ -961,7 +961,7 @@ nf_nat_ipv6_local_fn(void *priv, struct sk_buff *skb, return ret; } -const struct nf_hook_ops nf_nat_ipv6_ops[] = { +static const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = nf_nat_ipv6_in, diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c index c769462a839e..b0930d4aba22 100644 --- a/net/netfilter/nf_synproxy_core.c +++ b/net/netfilter/nf_synproxy_core.c @@ -56,7 +56,7 @@ synproxy_parse_options(const struct sk_buff *skb, unsigned int doff, switch (opcode) { case TCPOPT_MSS: if (opsize == TCPOLEN_MSS) { - opts->mss = get_unaligned_be16(ptr); + opts->mss_option = get_unaligned_be16(ptr); opts->options |= NF_SYNPROXY_OPT_MSS; } break; @@ -115,7 +115,7 @@ synproxy_build_options(struct tcphdr *th, const struct synproxy_options *opts) if (options & NF_SYNPROXY_OPT_MSS) *ptr++ = htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | - opts->mss); + opts->mss_option); if (options & NF_SYNPROXY_OPT_TIMESTAMP) { if (options & NF_SYNPROXY_OPT_SACK_PERM) @@ -642,7 +642,7 @@ synproxy_recv_client_ack(struct net *net, } this_cpu_inc(snet->stats->cookie_valid); - opts->mss = mss; + opts->mss_option = mss; opts->options |= NF_SYNPROXY_OPT_MSS; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) @@ -1060,7 +1060,7 @@ synproxy_recv_client_ack_ipv6(struct net *net, } this_cpu_inc(snet->stats->cookie_valid); - opts->mss = mss; + opts->mss_option = mss; opts->options |= NF_SYNPROXY_OPT_MSS; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index d47469f824a1..e4a68dc42694 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2853,7 +2853,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return nft_table_validate(net, table); if (chain->flags & NFT_CHAIN_HW_OFFLOAD) { - flow = nft_flow_rule_create(rule); + flow = nft_flow_rule_create(net, rule); if (IS_ERR(flow)) return PTR_ERR(flow); @@ -5131,6 +5131,41 @@ nft_obj_type_get(struct net *net, u32 objtype) return ERR_PTR(-ENOENT); } +static int nf_tables_updobj(const struct nft_ctx *ctx, + const struct nft_object_type *type, + const struct nlattr *attr, + struct nft_object *obj) +{ + struct nft_object *newobj; + struct nft_trans *trans; + int err; + + if (!obj->ops->update) + return -EOPNOTSUPP; + + trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ, + sizeof(struct nft_trans_obj)); + if (!trans) + return -ENOMEM; + + newobj = nft_obj_init(ctx, type, attr); + if (IS_ERR(newobj)) { + err = PTR_ERR(newobj); + goto err_free_trans; + } + + nft_trans_obj(trans) = obj; + nft_trans_obj_update(trans) = true; + nft_trans_obj_newobj(trans) = newobj; + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; + +err_free_trans: + kfree(trans); + return err; +} + static int nf_tables_newobj(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -5170,7 +5205,13 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); return -EEXIST; } - return 0; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + type = nft_obj_type_get(net, objtype); + nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); + + return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj); } nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla); @@ -6431,6 +6472,19 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } +static void nft_obj_commit_update(struct nft_trans *trans) +{ + struct nft_object *newobj; + struct nft_object *obj; + + obj = nft_trans_obj(trans); + newobj = nft_trans_obj_newobj(trans); + + obj->ops->update(obj, newobj); + + kfree(newobj); +} + static void nft_commit_release(struct nft_trans *trans) { switch (trans->msg_type) { @@ -6795,10 +6849,18 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) te->set->ndeact--; break; case NFT_MSG_NEWOBJ: - nft_clear(net, nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), - NFT_MSG_NEWOBJ); - nft_trans_destroy(trans); + if (nft_trans_obj_update(trans)) { + nft_obj_commit_update(trans); + nf_tables_obj_notify(&trans->ctx, + nft_trans_obj(trans), + NFT_MSG_NEWOBJ); + } else { + nft_clear(net, nft_trans_obj(trans)); + nf_tables_obj_notify(&trans->ctx, + nft_trans_obj(trans), + NFT_MSG_NEWOBJ); + nft_trans_destroy(trans); + } break; case NFT_MSG_DELOBJ: nft_obj_del(nft_trans_obj(trans)); @@ -6945,8 +7007,13 @@ static int __nf_tables_abort(struct net *net) nft_trans_destroy(trans); break; case NFT_MSG_NEWOBJ: - trans->ctx.table->use--; - nft_obj_del(nft_trans_obj(trans)); + if (nft_trans_obj_update(trans)) { + kfree(nft_trans_obj_newobj(trans)); + nft_trans_destroy(trans); + } else { + trans->ctx.table->use--; + nft_obj_del(nft_trans_obj(trans)); + } break; case NFT_MSG_DELOBJ: trans->ctx.table->use++; @@ -7627,13 +7694,20 @@ static int __init nf_tables_module_init(void) if (err < 0) goto err4; + err = nft_offload_init(); + if (err < 0) + goto err5; + /* must be last */ err = nfnetlink_subsys_register(&nf_tables_subsys); if (err < 0) - goto err5; + goto err6; nft_chain_route_init(); + return err; +err6: + nft_offload_exit(); err5: rhltable_destroy(&nft_objname_ht); err4: @@ -7650,6 +7724,7 @@ err1: static void __exit nf_tables_module_exit(void) { nfnetlink_subsys_unregister(&nf_tables_subsys); + nft_offload_exit(); unregister_netdevice_notifier(&nf_tables_flowtable_notifier); nft_chain_filter_fini(); nft_chain_route_fini(); diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index c0d18c1d77ac..21bb772cb4b7 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -28,13 +28,10 @@ static struct nft_flow_rule *nft_flow_rule_alloc(int num_actions) return flow; } -struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule) +struct nft_flow_rule *nft_flow_rule_create(struct net *net, + const struct nft_rule *rule) { - struct nft_offload_ctx ctx = { - .dep = { - .type = NFT_OFFLOAD_DEP_UNSPEC, - }, - }; + struct nft_offload_ctx *ctx; struct nft_flow_rule *flow; int num_actions = 0, err; struct nft_expr *expr; @@ -52,21 +49,32 @@ struct nft_flow_rule *nft_flow_rule_create(const struct nft_rule *rule) return ERR_PTR(-ENOMEM); expr = nft_expr_first(rule); + + ctx = kzalloc(sizeof(struct nft_offload_ctx), GFP_KERNEL); + if (!ctx) { + err = -ENOMEM; + goto err_out; + } + ctx->net = net; + ctx->dep.type = NFT_OFFLOAD_DEP_UNSPEC; + while (expr->ops && expr != nft_expr_last(rule)) { if (!expr->ops->offload) { err = -EOPNOTSUPP; goto err_out; } - err = expr->ops->offload(&ctx, flow, expr); + err = expr->ops->offload(ctx, flow, expr); if (err < 0) goto err_out; expr = nft_expr_next(expr); } - flow->proto = ctx.dep.l3num; + flow->proto = ctx->dep.l3num; + kfree(ctx); return flow; err_out: + kfree(ctx); nft_flow_rule_destroy(flow); return ERR_PTR(err); @@ -74,6 +82,19 @@ err_out: void nft_flow_rule_destroy(struct nft_flow_rule *flow) { + struct flow_action_entry *entry; + int i; + + flow_action_for_each(i, entry, &flow->rule->action) { + switch (entry->id) { + case FLOW_ACTION_REDIRECT: + case FLOW_ACTION_MIRRED: + dev_put(entry->dev); + break; + default: + break; + } + } kfree(flow->rule); kfree(flow); } @@ -134,20 +155,20 @@ int nft_chain_offload_priority(struct nft_base_chain *basechain) return 0; } -static int nft_flow_offload_rule(struct nft_trans *trans, +static int nft_flow_offload_rule(struct nft_chain *chain, + struct nft_rule *rule, + struct nft_flow_rule *flow, enum flow_cls_command command) { - struct nft_flow_rule *flow = nft_trans_flow_rule(trans); - struct nft_rule *rule = nft_trans_rule(trans); struct flow_cls_offload cls_flow = {}; struct nft_base_chain *basechain; struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(chain); if (flow) proto = flow->proto; @@ -182,58 +203,130 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo, return 0; } -#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK +static int nft_block_setup(struct nft_base_chain *basechain, + struct flow_block_offload *bo, + enum flow_block_command cmd) +{ + int err; -static int nft_flow_offload_chain(struct nft_trans *trans, - enum flow_block_command cmd) + switch (cmd) { + case FLOW_BLOCK_BIND: + err = nft_flow_offload_bind(bo, basechain); + break; + case FLOW_BLOCK_UNBIND: + err = nft_flow_offload_unbind(bo, basechain); + break; + default: + WARN_ON_ONCE(1); + err = -EOPNOTSUPP; + } + + return err; +} + +static int nft_block_offload_cmd(struct nft_base_chain *chain, + struct net_device *dev, + enum flow_block_command cmd) +{ + struct netlink_ext_ack extack = {}; + struct flow_block_offload bo = {}; + int err; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); + if (err < 0) + return err; + + return nft_block_setup(chain, &bo, cmd); +} + +static void nft_indr_block_ing_cmd(struct net_device *dev, + struct nft_base_chain *chain, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command cmd) { - struct nft_chain *chain = trans->ctx.chain; struct netlink_ext_ack extack = {}; struct flow_block_offload bo = {}; + + if (!chain) + return; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); + + nft_block_setup(chain, &bo, cmd); +} + +static int nft_indr_block_offload_cmd(struct nft_base_chain *chain, + struct net_device *dev, + enum flow_block_command cmd) +{ + struct flow_block_offload bo = {}; + struct netlink_ext_ack extack = {}; + + bo.net = dev_net(dev); + bo.block = &chain->flow_block; + bo.command = cmd; + bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + bo.extack = &extack; + INIT_LIST_HEAD(&bo.cb_list); + + flow_indr_block_call(dev, &bo, cmd); + + if (list_empty(&bo.cb_list)) + return -EOPNOTSUPP; + + return nft_block_setup(chain, &bo, cmd); +} + +#define FLOW_SETUP_BLOCK TC_SETUP_BLOCK + +static int nft_flow_offload_chain(struct nft_chain *chain, + u8 *ppolicy, + enum flow_block_command cmd) +{ struct nft_base_chain *basechain; struct net_device *dev; - int err; + u8 policy; if (!nft_is_base_chain(chain)) return -EOPNOTSUPP; basechain = nft_base_chain(chain); dev = basechain->ops.dev; - if (!dev || !dev->netdev_ops->ndo_setup_tc) + if (!dev) return -EOPNOTSUPP; + policy = ppolicy ? *ppolicy : basechain->policy; + /* Only default policy to accept is supported for now. */ - if (cmd == FLOW_BLOCK_BIND && - nft_trans_chain_policy(trans) != -1 && - nft_trans_chain_policy(trans) != NF_ACCEPT) + if (cmd == FLOW_BLOCK_BIND && policy != -1 && policy != NF_ACCEPT) return -EOPNOTSUPP; - bo.command = cmd; - bo.block = &basechain->flow_block; - bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS; - bo.extack = &extack; - INIT_LIST_HEAD(&bo.cb_list); - - err = dev->netdev_ops->ndo_setup_tc(dev, FLOW_SETUP_BLOCK, &bo); - if (err < 0) - return err; - - switch (cmd) { - case FLOW_BLOCK_BIND: - err = nft_flow_offload_bind(&bo, basechain); - break; - case FLOW_BLOCK_UNBIND: - err = nft_flow_offload_unbind(&bo, basechain); - break; - } - - return err; + if (dev->netdev_ops->ndo_setup_tc) + return nft_block_offload_cmd(basechain, dev, cmd); + else + return nft_indr_block_offload_cmd(basechain, dev, cmd); } int nft_flow_rule_offload_commit(struct net *net) { struct nft_trans *trans; int err = 0; + u8 policy; list_for_each_entry(trans, &net->nft.commit_list, list) { if (trans->ctx.family != NFPROTO_NETDEV) @@ -244,13 +337,17 @@ int nft_flow_rule_offload_commit(struct net *net) if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_chain(trans, FLOW_BLOCK_BIND); + policy = nft_trans_chain_policy(trans); + err = nft_flow_offload_chain(trans->ctx.chain, &policy, + FLOW_BLOCK_BIND); break; case NFT_MSG_DELCHAIN: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_chain(trans, FLOW_BLOCK_UNBIND); + policy = nft_trans_chain_policy(trans); + err = nft_flow_offload_chain(trans->ctx.chain, &policy, + FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) @@ -260,14 +357,20 @@ int nft_flow_rule_offload_commit(struct net *net) !(trans->ctx.flags & NLM_F_APPEND)) return -EOPNOTSUPP; - err = nft_flow_offload_rule(trans, FLOW_CLS_REPLACE); + err = nft_flow_offload_rule(trans->ctx.chain, + nft_trans_rule(trans), + nft_trans_flow_rule(trans), + FLOW_CLS_REPLACE); nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans, FLOW_CLS_DESTROY); + err = nft_flow_offload_rule(trans->ctx.chain, + nft_trans_rule(trans), + nft_trans_flow_rule(trans), + FLOW_CLS_DESTROY); break; } @@ -277,3 +380,104 @@ int nft_flow_rule_offload_commit(struct net *net) return err; } + +static struct nft_chain *__nft_offload_get_chain(struct net_device *dev) +{ + struct nft_base_chain *basechain; + struct net *net = dev_net(dev); + const struct nft_table *table; + struct nft_chain *chain; + + list_for_each_entry(table, &net->nft.tables, list) { + if (table->family != NFPROTO_NETDEV) + continue; + + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_base_chain(chain) || + !(chain->flags & NFT_CHAIN_HW_OFFLOAD)) + continue; + + basechain = nft_base_chain(chain); + if (strncmp(basechain->dev_name, dev->name, IFNAMSIZ)) + continue; + + return chain; + } + } + + return NULL; +} + +static void nft_indr_block_cb(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, void *cb_priv, + enum flow_block_command cmd) +{ + struct net *net = dev_net(dev); + struct nft_chain *chain; + + mutex_lock(&net->nft.commit_mutex); + chain = __nft_offload_get_chain(dev); + if (chain) { + struct nft_base_chain *basechain; + + basechain = nft_base_chain(chain); + nft_indr_block_ing_cmd(dev, basechain, cb, cb_priv, cmd); + } + mutex_unlock(&net->nft.commit_mutex); +} + +static void nft_offload_chain_clean(struct nft_chain *chain) +{ + struct nft_rule *rule; + + list_for_each_entry(rule, &chain->rules, list) { + nft_flow_offload_rule(chain, rule, + NULL, FLOW_CLS_DESTROY); + } + + nft_flow_offload_chain(chain, NULL, FLOW_BLOCK_UNBIND); +} + +static int nft_offload_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); + struct nft_chain *chain; + + mutex_lock(&net->nft.commit_mutex); + chain = __nft_offload_get_chain(dev); + if (chain) + nft_offload_chain_clean(chain); + mutex_unlock(&net->nft.commit_mutex); + + return NOTIFY_DONE; +} + +static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = nft_indr_block_cb, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + +static struct notifier_block nft_offload_netdev_notifier = { + .notifier_call = nft_offload_netdev_event, +}; + +int nft_offload_init(void) +{ + int err; + + err = register_netdevice_notifier(&nft_offload_netdev_notifier); + if (err < 0) + return err; + + flow_indr_add_block_ing_cb(&block_ing_entry); + + return 0; +} + +void nft_offload_exit(void) +{ + flow_indr_del_block_ing_cb(&block_ing_entry); + unregister_netdevice_notifier(&nft_offload_netdev_notifier); +} diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6dee4f9a944c..0ba020ca38e6 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -385,6 +385,57 @@ nfulnl_timer(struct timer_list *t) instance_put(inst); } +static u32 nfulnl_get_bridge_size(const struct sk_buff *skb) +{ + u32 size = 0; + + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_present(skb)) { + size += nla_total_size(0); /* nested */ + size += nla_total_size(sizeof(u16)); /* id */ + size += nla_total_size(sizeof(u16)); /* tag */ + } + + if (skb->network_header > skb->mac_header) + size += nla_total_size(skb->network_header - skb->mac_header); + + return size; +} + +static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb) +{ + if (!skb_mac_header_was_set(skb)) + return 0; + + if (skb_vlan_tag_present(skb)) { + struct nlattr *nest; + + nest = nla_nest_start(inst->skb, NFULA_VLAN); + if (!nest) + goto nla_put_failure; + + if (nla_put_be16(inst->skb, NFULA_VLAN_TCI, htons(skb->vlan_tci)) || + nla_put_be16(inst->skb, NFULA_VLAN_PROTO, skb->vlan_proto)) + goto nla_put_failure; + + nla_nest_end(inst->skb, nest); + } + + if (skb->mac_header < skb->network_header) { + int len = (int)(skb->network_header - skb->mac_header); + + if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb))) + goto nla_put_failure; + } + + return 0; + +nla_put_failure: + return -1; +} + /* This is an inline function, we don't really care about a long * list of arguments */ static inline int @@ -580,6 +631,10 @@ __build_packet_message(struct nfnl_log_net *log, NFULA_CT, NFULA_CT_INFO) < 0) goto nla_put_failure; + if ((pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) && + nfulnl_put_bridge(inst, skb) < 0) + goto nla_put_failure; + if (data_len) { struct nlattr *nla; int size = nla_attr_size(data_len); @@ -651,7 +706,7 @@ nfulnl_log_packet(struct net *net, /* FIXME: do we want to make the size calculation conditional based on * what is actually present? way more branches and checks, but more * memory efficient... */ - size = nlmsg_total_size(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfulnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -668,7 +723,7 @@ nfulnl_log_packet(struct net *net, + nla_total_size(sizeof(struct nfgenmsg)); /* NLMSG_DONE */ if (in && skb_mac_header_was_set(skb)) { - size += nla_total_size(skb->dev->hard_header_len) + size += nla_total_size(skb->dev->hard_header_len) + nla_total_size(sizeof(u_int16_t)) /* hwtype */ + nla_total_size(sizeof(u_int16_t)); /* hwlen */ } @@ -687,6 +742,8 @@ nfulnl_log_packet(struct net *net, size += nfnl_ct->build_size(ct); } } + if (pf == NFPROTO_NETDEV || pf == NFPROTO_BRIDGE) + size += nfulnl_get_bridge_size(skb); qthreshold = inst->qthreshold; /* per-rule qthreshold overrides per-instance */ diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index b6a7ce622c72..feabdfb22920 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -394,7 +394,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, char *secdata = NULL; u32 seclen = 0; - size = nlmsg_total_size(sizeof(struct nfgenmsg)) + size = nlmsg_total_size(sizeof(struct nfgenmsg)) + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr)) + nla_total_size(sizeof(u_int32_t)) /* ifindex */ + nla_total_size(sizeof(u_int32_t)) /* ifindex */ @@ -453,7 +453,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, } if (queue->flags & NFQA_CFG_F_UID_GID) { - size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + size += (nla_total_size(sizeof(u_int32_t)) /* uid */ + nla_total_size(sizeof(u_int32_t))); /* gid */ } diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index b310b637b550..974300178fa9 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -13,6 +13,7 @@ #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> struct nft_bitwise { enum nft_registers sreg:8; @@ -126,12 +127,30 @@ nla_put_failure: return -1; } +static struct nft_data zero; + +static int nft_bitwise_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_bitwise *priv = nft_expr_priv(expr); + + if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) || + priv->sreg != priv->dreg) + return -EOPNOTSUPP; + + memcpy(&ctx->regs[priv->dreg].mask, &priv->mask, sizeof(priv->mask)); + + return 0; +} + static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, + .offload = nft_bitwise_offload, }; struct nft_expr_type nft_bitwise_type __read_mostly = { diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index e06318428ea0..12bed3f7bbc6 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -43,14 +43,15 @@ void nft_byteorder_eval(const struct nft_expr *expr, switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { - src64 = get_unaligned((u64 *)&src[i]); - put_unaligned_be64(src64, &dst[i]); + src64 = nft_reg_load64(&src[i]); + nft_reg_store64(&dst[i], be64_to_cpu(src64)); } break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 8; i++) { - src64 = get_unaligned_be64(&src[i]); - put_unaligned(src64, (u64 *)&dst[i]); + src64 = (__force __u64) + cpu_to_be64(nft_reg_load64(&src[i])); + nft_reg_store64(&dst[i], src64); } break; } diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index c6052fdd2c40..c2e78c160fd7 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -10,6 +10,7 @@ #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> struct nft_dup_netdev { @@ -56,6 +57,16 @@ nla_put_failure: return -1; } +static int nft_dup_netdev_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_dup_netdev *priv = nft_expr_priv(expr); + int oif = ctx->regs[priv->sreg_dev].data.data[0]; + + return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_MIRRED, oif); +} + static struct nft_expr_type nft_dup_netdev_type; static const struct nft_expr_ops nft_dup_netdev_ops = { .type = &nft_dup_netdev_type, @@ -63,6 +74,7 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .eval = nft_dup_netdev_eval, .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, + .offload = nft_dup_netdev_offload, }; static struct nft_expr_type nft_dup_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 33833a0cb989..8887295414dc 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -84,6 +84,11 @@ void nft_dynset_eval(const struct nft_expr *expr, const struct nft_expr *sexpr; u64 timeout; + if (priv->op == NFT_DYNSET_OP_DELETE) { + set->ops->delete(set, ®s->data[priv->sreg_key]); + return; + } + if (set->ops->update(set, ®s->data[priv->sreg_key], nft_dynset_new, expr, regs, &ext)) { sexpr = NULL; @@ -161,6 +166,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP])); switch (priv->op) { case NFT_DYNSET_OP_ADD: + case NFT_DYNSET_OP_DELETE: break; case NFT_DYNSET_OP_UPDATE: if (!(set->flags & NFT_SET_TIMEOUT)) diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 01705ad74a9a..22cf236eb5d5 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -6,12 +6,13 @@ #include <linux/netfilter.h> #include <linux/workqueue.h> #include <linux/spinlock.h> +#include <linux/netfilter/nf_conntrack_common.h> #include <linux/netfilter/nf_tables.h> #include <net/ip.h> /* for ipv4 options. */ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_conntrack_core.h> -#include <linux/netfilter/nf_conntrack_common.h> +#include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_flow_table.h> struct nft_flow_offload { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 61b7f93ac681..aba11c2333f3 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -12,6 +12,7 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_offload.h> #include <net/netfilter/nf_dup_netdev.h> #include <net/neighbour.h> #include <net/ip.h> @@ -63,6 +64,16 @@ nla_put_failure: return -1; } +static int nft_fwd_netdev_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_fwd_netdev *priv = nft_expr_priv(expr); + int oif = ctx->regs[priv->sreg_dev].data.data[0]; + + return nft_fwd_dup_netdev_offload(ctx, flow, FLOW_ACTION_REDIRECT, oif); +} + struct nft_fwd_neigh { enum nft_registers sreg_dev:8; enum nft_registers sreg_addr:8; @@ -194,6 +205,7 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .eval = nft_fwd_netdev_eval, .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, + .offload = nft_fwd_netdev_offload, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index ca2ae4b95a8d..c7f0ef73d939 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -125,17 +125,13 @@ static int nft_immediate_validate(const struct nft_ctx *ctx, return 0; } -static int nft_immediate_offload(struct nft_offload_ctx *ctx, - struct nft_flow_rule *flow, - const struct nft_expr *expr) +static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_immediate_expr *priv) { - const struct nft_immediate_expr *priv = nft_expr_priv(expr); struct flow_action_entry *entry; const struct nft_data *data; - if (priv->dreg != NFT_REG_VERDICT) - return -EOPNOTSUPP; - entry = &flow->rule->action.entries[ctx->num_actions++]; data = &priv->data; @@ -153,6 +149,20 @@ static int nft_immediate_offload(struct nft_offload_ctx *ctx, return 0; } +static int nft_immediate_offload(struct nft_offload_ctx *ctx, + struct nft_flow_rule *flow, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + if (priv->dreg == NFT_REG_VERDICT) + return nft_immediate_offload_verdict(ctx, flow, priv); + + memcpy(&ctx->regs[priv->dreg].data, &priv->data, sizeof(priv->data)); + + return 0; +} + static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index f69afb9ff3cb..317e3a9e8c5b 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -26,8 +26,36 @@ #include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */ +#define NFT_META_SECS_PER_MINUTE 60 +#define NFT_META_SECS_PER_HOUR 3600 +#define NFT_META_SECS_PER_DAY 86400 +#define NFT_META_DAYS_PER_WEEK 7 + static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state); +static u8 nft_meta_weekday(unsigned long secs) +{ + unsigned int dse; + u8 wday; + + secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest; + dse = secs / NFT_META_SECS_PER_DAY; + wday = (4 + dse) % NFT_META_DAYS_PER_WEEK; + + return wday; +} + +static u32 nft_meta_hour(unsigned long secs) +{ + struct tm tm; + + time64_to_tm(secs, 0, &tm); + + return tm.tm_hour * NFT_META_SECS_PER_HOUR + + tm.tm_min * NFT_META_SECS_PER_MINUTE + + tm.tm_sec; +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -218,6 +246,15 @@ void nft_meta_get_eval(const struct nft_expr *expr, goto err; strncpy((char *)dest, out->rtnl_link_ops->kind, IFNAMSIZ); break; + case NFT_META_TIME_NS: + nft_reg_store64(dest, ktime_get_real_ns()); + break; + case NFT_META_TIME_DAY: + nft_reg_store8(dest, nft_meta_weekday(get_seconds())); + break; + case NFT_META_TIME_HOUR: + *dest = nft_meta_hour(get_seconds()); + break; default: WARN_ON(1); goto err; @@ -330,6 +367,15 @@ int nft_meta_get_init(const struct nft_ctx *ctx, len = sizeof(u8); break; #endif + case NFT_META_TIME_NS: + len = sizeof(u64); + break; + case NFT_META_TIME_DAY: + len = sizeof(u8); + break; + case NFT_META_TIME_HOUR: + len = sizeof(u32); + break; default: return -EOPNOTSUPP; } diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index c8745d454bf8..4413690591f2 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -13,7 +13,7 @@ #include <net/netfilter/nf_tables.h> struct nft_quota { - u64 quota; + atomic64_t quota; unsigned long flags; atomic64_t consumed; }; @@ -21,7 +21,8 @@ struct nft_quota { static inline bool nft_overquota(struct nft_quota *priv, const struct sk_buff *skb) { - return atomic64_add_return(skb->len, &priv->consumed) >= priv->quota; + return atomic64_add_return(skb->len, &priv->consumed) >= + atomic64_read(&priv->quota); } static inline bool nft_quota_invert(struct nft_quota *priv) @@ -89,7 +90,7 @@ static int nft_quota_do_init(const struct nlattr * const tb[], return -EOPNOTSUPP; } - priv->quota = quota; + atomic64_set(&priv->quota, quota); priv->flags = flags; atomic64_set(&priv->consumed, consumed); @@ -105,10 +106,22 @@ static int nft_quota_obj_init(const struct nft_ctx *ctx, return nft_quota_do_init(tb, priv); } +static void nft_quota_obj_update(struct nft_object *obj, + struct nft_object *newobj) +{ + struct nft_quota *newpriv = nft_obj_data(newobj); + struct nft_quota *priv = nft_obj_data(obj); + u64 newquota; + + newquota = atomic64_read(&newpriv->quota); + atomic64_set(&priv->quota, newquota); + priv->flags = newpriv->flags; +} + static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, bool reset) { - u64 consumed, consumed_cap; + u64 consumed, consumed_cap, quota; u32 flags = priv->flags; /* Since we inconditionally increment consumed quota for each packet @@ -116,14 +129,15 @@ static int nft_quota_do_dump(struct sk_buff *skb, struct nft_quota *priv, * userspace. */ consumed = atomic64_read(&priv->consumed); - if (consumed >= priv->quota) { - consumed_cap = priv->quota; + quota = atomic64_read(&priv->quota); + if (consumed >= quota) { + consumed_cap = quota; flags |= NFT_QUOTA_F_DEPLETED; } else { consumed_cap = consumed; } - if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota), + if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(quota), NFTA_QUOTA_PAD) || nla_put_be64(skb, NFTA_QUOTA_CONSUMED, cpu_to_be64(consumed_cap), NFTA_QUOTA_PAD) || @@ -155,6 +169,7 @@ static const struct nft_object_ops nft_quota_obj_ops = { .init = nft_quota_obj_init, .eval = nft_quota_obj_eval, .dump = nft_quota_obj_dump, + .update = nft_quota_obj_update, }; static struct nft_object_type nft_quota_obj_type __read_mostly = { diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index b5aeccdddb22..087a056e34d1 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -10,7 +10,7 @@ #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> struct nft_bitmap_elem { struct list_head head; diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 6e8d20c03e3d..b331a3c9a3a8 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -16,7 +16,7 @@ #include <linux/rhashtable.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> /* We target a hash table size of 4, element hint is 75% of final size */ #define NFT_RHASH_ELEMENT_HINT 3 @@ -234,6 +234,24 @@ static void nft_rhash_remove(const struct net *net, rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params); } +static bool nft_rhash_delete(const struct nft_set *set, + const u32 *key) +{ + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_cmp_arg arg = { + .genmask = NFT_GENMASK_ANY, + .set = set, + .key = key, + }; + struct nft_rhash_elem *he; + + he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params); + if (he == NULL) + return false; + + return rhashtable_remove_fast(&priv->ht, &he->node, nft_rhash_params) == 0; +} + static void nft_rhash_walk(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_iter *iter) { @@ -662,6 +680,7 @@ struct nft_set_type nft_set_rhash_type __read_mostly = { .remove = nft_rhash_remove, .lookup = nft_rhash_lookup, .update = nft_rhash_update, + .delete = nft_rhash_delete, .walk = nft_rhash_walk, .get = nft_rhash_get, }, diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 419d58ef802b..57123259452f 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -13,7 +13,7 @@ #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> -#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> struct nft_rbtree { struct rb_root root; diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index 928e661d1517..e2c1fc608841 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -24,15 +24,15 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts, const struct tcphdr *tcp, struct synproxy_net *snet, struct nf_synproxy_info *info, - struct nft_synproxy *priv) + const struct nft_synproxy *priv) { this_cpu_inc(snet->stats->syn_received); if (tcp->ece && tcp->cwr) opts->options |= NF_SYNPROXY_OPT_ECN; opts->options &= priv->info.options; - opts->mss_encode = opts->mss; - opts->mss = info->mss; + opts->mss_encode = opts->mss_option; + opts->mss_option = info->mss; if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP) synproxy_init_timestamp_cookie(info, opts); else @@ -41,14 +41,13 @@ static void nft_synproxy_tcp_options(struct synproxy_options *opts, NF_SYNPROXY_OPT_ECN); } -static void nft_synproxy_eval_v4(const struct nft_expr *expr, +static void nft_synproxy_eval_v4(const struct nft_synproxy *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt, const struct tcphdr *tcp, struct tcphdr *_tcph, struct synproxy_options *opts) { - struct nft_synproxy *priv = nft_expr_priv(expr); struct nf_synproxy_info info = priv->info; struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); @@ -73,14 +72,13 @@ static void nft_synproxy_eval_v4(const struct nft_expr *expr, } #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) -static void nft_synproxy_eval_v6(const struct nft_expr *expr, +static void nft_synproxy_eval_v6(const struct nft_synproxy *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt, const struct tcphdr *tcp, struct tcphdr *_tcph, struct synproxy_options *opts) { - struct nft_synproxy *priv = nft_expr_priv(expr); struct nf_synproxy_info info = priv->info; struct net *net = nft_net(pkt); struct synproxy_net *snet = synproxy_pernet(net); @@ -105,9 +103,9 @@ static void nft_synproxy_eval_v6(const struct nft_expr *expr, } #endif /* CONFIG_NF_TABLES_IPV6*/ -static void nft_synproxy_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_synproxy_do_eval(const struct nft_synproxy *priv, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct synproxy_options opts = {}; struct sk_buff *skb = pkt->skb; @@ -140,23 +138,22 @@ static void nft_synproxy_eval(const struct nft_expr *expr, switch (skb->protocol) { case htons(ETH_P_IP): - nft_synproxy_eval_v4(expr, regs, pkt, tcp, &_tcph, &opts); + nft_synproxy_eval_v4(priv, regs, pkt, tcp, &_tcph, &opts); return; #if IS_ENABLED(CONFIG_NF_TABLES_IPV6) case htons(ETH_P_IPV6): - nft_synproxy_eval_v6(expr, regs, pkt, tcp, &_tcph, &opts); + nft_synproxy_eval_v6(priv, regs, pkt, tcp, &_tcph, &opts); return; #endif } regs->verdict.code = NFT_BREAK; } -static int nft_synproxy_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_synproxy_do_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_synproxy *priv) { struct synproxy_net *snet = synproxy_pernet(ctx->net); - struct nft_synproxy *priv = nft_expr_priv(expr); u32 flags; int err; @@ -206,8 +203,7 @@ nf_ct_failure: return err; } -static void nft_synproxy_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_synproxy_do_destroy(const struct nft_ctx *ctx) { struct synproxy_net *snet = synproxy_pernet(ctx->net); @@ -229,10 +225,8 @@ static void nft_synproxy_destroy(const struct nft_ctx *ctx, nf_ct_netns_put(ctx->net, ctx->family); } -static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_synproxy_do_dump(struct sk_buff *skb, struct nft_synproxy *priv) { - const struct nft_synproxy *priv = nft_expr_priv(expr); - if (nla_put_be16(skb, NFTA_SYNPROXY_MSS, htons(priv->info.mss)) || nla_put_u8(skb, NFTA_SYNPROXY_WSCALE, priv->info.wscale) || nla_put_be32(skb, NFTA_SYNPROXY_FLAGS, htonl(priv->info.options))) @@ -244,6 +238,15 @@ nla_put_failure: return -1; } +static void nft_synproxy_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_synproxy *priv = nft_expr_priv(expr); + + nft_synproxy_do_eval(priv, regs, pkt); +} + static int nft_synproxy_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -252,6 +255,28 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx, (1 << NF_INET_FORWARD)); } +static int nft_synproxy_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_synproxy *priv = nft_expr_priv(expr); + + return nft_synproxy_do_init(ctx, tb, priv); +} + +static void nft_synproxy_destroy(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + nft_synproxy_do_destroy(ctx); +} + +static int nft_synproxy_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + struct nft_synproxy *priv = nft_expr_priv(expr); + + return nft_synproxy_do_dump(skb, priv); +} + static struct nft_expr_type nft_synproxy_type; static const struct nft_expr_ops nft_synproxy_ops = { .eval = nft_synproxy_eval, @@ -271,14 +296,89 @@ static struct nft_expr_type nft_synproxy_type __read_mostly = { .maxattr = NFTA_SYNPROXY_MAX, }; +static int nft_synproxy_obj_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_synproxy *priv = nft_obj_data(obj); + + return nft_synproxy_do_init(ctx, tb, priv); +} + +static void nft_synproxy_obj_destroy(const struct nft_ctx *ctx, + struct nft_object *obj) +{ + nft_synproxy_do_destroy(ctx); +} + +static int nft_synproxy_obj_dump(struct sk_buff *skb, + struct nft_object *obj, bool reset) +{ + struct nft_synproxy *priv = nft_obj_data(obj); + + return nft_synproxy_do_dump(skb, priv); +} + +static void nft_synproxy_obj_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_synproxy *priv = nft_obj_data(obj); + + nft_synproxy_do_eval(priv, regs, pkt); +} + +static void nft_synproxy_obj_update(struct nft_object *obj, + struct nft_object *newobj) +{ + struct nft_synproxy *newpriv = nft_obj_data(newobj); + struct nft_synproxy *priv = nft_obj_data(obj); + + priv->info = newpriv->info; +} + +static struct nft_object_type nft_synproxy_obj_type; +static const struct nft_object_ops nft_synproxy_obj_ops = { + .type = &nft_synproxy_obj_type, + .size = sizeof(struct nft_synproxy), + .init = nft_synproxy_obj_init, + .destroy = nft_synproxy_obj_destroy, + .dump = nft_synproxy_obj_dump, + .eval = nft_synproxy_obj_eval, + .update = nft_synproxy_obj_update, +}; + +static struct nft_object_type nft_synproxy_obj_type __read_mostly = { + .type = NFT_OBJECT_SYNPROXY, + .ops = &nft_synproxy_obj_ops, + .maxattr = NFTA_SYNPROXY_MAX, + .policy = nft_synproxy_policy, + .owner = THIS_MODULE, +}; + static int __init nft_synproxy_module_init(void) { - return nft_register_expr(&nft_synproxy_type); + int err; + + err = nft_register_obj(&nft_synproxy_obj_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_synproxy_type); + if (err < 0) + goto err; + + return 0; + +err: + nft_unregister_obj(&nft_synproxy_obj_type); + return err; } static void __exit nft_synproxy_module_exit(void) { - return nft_unregister_expr(&nft_synproxy_type); + nft_unregister_expr(&nft_synproxy_type); + nft_unregister_obj(&nft_synproxy_obj_type); } module_init(nft_synproxy_module_init); @@ -287,3 +387,4 @@ module_exit(nft_synproxy_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Fernando Fernandez <ffmancera@riseup.net>"); MODULE_ALIAS_NFT_EXPR("synproxy"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_SYNPROXY); diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 9cec9eae556a..f56d3ed93e56 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -283,7 +283,7 @@ static int __init idletimer_tg_init(void) idletimer_tg_kobj = &idletimer_tg_device->kobj; - err = xt_register_target(&idletimer_tg); + err = xt_register_target(&idletimer_tg); if (err < 0) { pr_debug("couldn't register xt target\n"); goto out_dev; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index bc6c8ab0fa62..46fcac75f726 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -13,6 +13,8 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/ip.h> +#include <linux/ipv6.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter/x_tables.h> diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 2d2691dd51e0..ced3fc8fad7c 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -34,9 +34,14 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> -#include <linux/netfilter/xt_hashlimit.h> #include <linux/mutex.h> #include <linux/kernel.h> +#include <uapi/linux/netfilter/xt_hashlimit.h> + +#define XT_HASHLIMIT_ALL (XT_HASHLIMIT_HASH_DIP | XT_HASHLIMIT_HASH_DPT | \ + XT_HASHLIMIT_HASH_SIP | XT_HASHLIMIT_HASH_SPT | \ + XT_HASHLIMIT_INVERT | XT_HASHLIMIT_BYTES |\ + XT_HASHLIMIT_RATE_MATCH) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index b92b22ce8abd..ec6ed6fda96c 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -5,12 +5,13 @@ /* (C) 2001-2003 Bart De Schuymer <bdschuym@pandora.be> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/if.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netfilter_bridge.h> -#include <linux/netfilter/xt_physdev.h> #include <linux/netfilter/x_tables.h> -#include <net/netfilter/br_netfilter.h> +#include <uapi/linux/netfilter/xt_physdev.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index ecbfa291fb70..731bc2cafae4 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -14,7 +14,6 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/ipset/ip_set.h> -#include <linux/netfilter/ipset/ip_set_timeout.h> #include <uapi/linux/netfilter/xt_set.h> MODULE_LICENSE("GPL"); diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 2b0ef55cf89e..409a3ae47ce2 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -607,7 +607,7 @@ catmap_getnode_alloc: */ int netlbl_catmap_walk(struct netlbl_lsm_catmap *catmap, u32 offset) { - struct netlbl_lsm_catmap *iter = catmap; + struct netlbl_lsm_catmap *iter; u32 idx; u32 bit; NETLBL_CATMAP_MAPTYPE bitmap; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index ea64c90b14e8..17e6ca62f1be 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -970,7 +970,8 @@ static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info) int rc; u32 idx; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_TARGET_INDEX]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); @@ -1018,7 +1019,8 @@ static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg = NULL; u32 idx; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_FIRMWARE_NAME]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index d01410e52097..dde9d762edee 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -222,6 +222,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) struct dp_stats_percpu *stats; u64 *stats_counter; u32 n_mask_hit; + int error; stats = this_cpu_ptr(dp->stats_percpu); @@ -229,7 +230,6 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; - int error; memset(&upcall, 0, sizeof(upcall)); upcall.cmd = OVS_PACKET_CMD_MISS; @@ -246,7 +246,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); - ovs_execute_actions(dp, skb, sf_acts, key); + error = ovs_execute_actions(dp, skb, sf_acts, key); + if (unlikely(error)) + net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", + ovs_dp_name(dp), error); stats_counter = &stats->n_hit; @@ -1542,10 +1545,34 @@ static void ovs_dp_reset_user_features(struct sk_buff *skb, struct genl_info *in dp->user_features = 0; } -static void ovs_dp_change(struct datapath *dp, struct nlattr *a[]) +DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + +static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) { - if (a[OVS_DP_ATTR_USER_FEATURES]) - dp->user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + u32 user_features = 0; + + if (a[OVS_DP_ATTR_USER_FEATURES]) { + user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]); + + if (user_features & ~(OVS_DP_F_VPORT_PIDS | + OVS_DP_F_UNALIGNED | + OVS_DP_F_TC_RECIRC_SHARING)) + return -EOPNOTSUPP; + +#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (user_features & OVS_DP_F_TC_RECIRC_SHARING) + return -EOPNOTSUPP; +#endif + } + + dp->user_features = user_features; + + if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) + static_branch_enable(&tc_recirc_sharing_support); + else + static_branch_disable(&tc_recirc_sharing_support); + + return 0; } static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) @@ -1607,7 +1634,9 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.port_no = OVSP_LOCAL; parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID]; - ovs_dp_change(dp, a); + err = ovs_dp_change(dp, a); + if (err) + goto err_destroy_meters; /* So far only local changes have been made, now need the lock. */ ovs_lock(); @@ -1733,7 +1762,9 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) goto err_unlock_free; - ovs_dp_change(dp, info->attrs); + err = ovs_dp_change(dp, info->attrs); + if (err) + goto err_unlock_free; err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, info->snd_seq, 0, OVS_DP_CMD_SET); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 751d34accdf9..81e85dde8217 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -218,6 +218,8 @@ static inline struct datapath *get_dp(struct net *net, int dp_ifindex) extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; +DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support); + void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key); void ovs_dp_detach_port(struct vport *); int ovs_dp_upcall(struct datapath *, struct sk_buff *, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 9d81d2c7bf82..38147e6a20f5 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -842,6 +842,9 @@ static int key_extract_mac_proto(struct sk_buff *skb) int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, struct sk_buff *skb, struct sw_flow_key *key) { +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + struct tc_skb_ext *tc_ext; +#endif int res, err; /* Extract metadata from packet. */ @@ -874,7 +877,17 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info, if (res < 0) return res; key->mac_proto = res; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + if (static_branch_unlikely(&tc_recirc_sharing_support)) { + tc_ext = skb_ext_find(skb, TC_SKB_EXT); + key->recirc_id = tc_ext ? tc_ext->chain : 0; + } else { + key->recirc_id = 0; + } +#else key->recirc_id = 0; +#endif err = key_extract(skb, key); if (!err) diff --git a/net/psample/psample.c b/net/psample/psample.c index 66e4b61a350d..a6ceb0533b5b 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -73,7 +73,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, int idx = 0; int err; - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); list_for_each_entry(group, &psample_groups_list, list) { if (!net_eq(group->net, sock_net(msg->sk))) continue; @@ -89,7 +89,7 @@ static int psample_nl_cmd_get_group_dumpit(struct sk_buff *msg, idx++; } - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); cb->args[0] = idx; return msg->len; } @@ -172,7 +172,7 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num) { struct psample_group *group; - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); group = psample_group_lookup(net, group_num); if (!group) { @@ -183,19 +183,27 @@ struct psample_group *psample_group_get(struct net *net, u32 group_num) group->refcount++; out: - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); return group; } EXPORT_SYMBOL_GPL(psample_group_get); +void psample_group_take(struct psample_group *group) +{ + spin_lock_bh(&psample_groups_lock); + group->refcount++; + spin_unlock_bh(&psample_groups_lock); +} +EXPORT_SYMBOL_GPL(psample_group_take); + void psample_group_put(struct psample_group *group) { - spin_lock(&psample_groups_lock); + spin_lock_bh(&psample_groups_lock); if (--group->refcount == 0) psample_group_destroy(group); - spin_unlock(&psample_groups_lock); + spin_unlock_bh(&psample_groups_lock); } EXPORT_SYMBOL_GPL(psample_group_put); diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index 2b969f99ef13..1a5bf3fa4578 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -559,7 +559,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, ret = -EDESTADDRREQ; break; } - if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) || + if (ipv4_is_multicast(sin->sin_addr.s_addr) || sin->sin_addr.s_addr == htonl(INADDR_BROADCAST)) { ret = -EINVAL; break; @@ -593,7 +593,7 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr, addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) { + ipv4_is_multicast(addr4)) { ret = -EPROTOTYPE; break; } @@ -705,7 +705,7 @@ static int rds_create(struct net *net, struct socket *sock, int protocol, if (sock->type != SOCK_SEQPACKET || protocol) return -ESOCKTNOSUPPORT; - sk = sk_alloc(net, AF_RDS, GFP_ATOMIC, &rds_proto, kern); + sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern); if (!sk) return -ENOMEM; @@ -741,6 +741,10 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, spin_lock_bh(&rds_sock_lock); list_for_each_entry(rs, &rds_sock_list, rs_item) { + /* This option only supports IPv4 sockets. */ + if (!ipv6_addr_v4mapped(&rs->rs_bound_addr)) + continue; + read_lock(&rs->rs_recv_lock); /* XXX too lazy to maintain counts.. */ @@ -762,21 +766,60 @@ static void rds_sock_inc_info(struct socket *sock, unsigned int len, lens->each = sizeof(struct rds_info_message); } +#if IS_ENABLED(CONFIG_IPV6) +static void rds6_sock_inc_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds_incoming *inc; + unsigned int total = 0; + struct rds_sock *rs; + + len /= sizeof(struct rds6_info_message); + + spin_lock_bh(&rds_sock_lock); + + list_for_each_entry(rs, &rds_sock_list, rs_item) { + read_lock(&rs->rs_recv_lock); + + list_for_each_entry(inc, &rs->rs_recv_queue, i_item) { + total++; + if (total <= len) + rds6_inc_info_copy(inc, iter, &inc->i_saddr, + &rs->rs_bound_addr, 1); + } + + read_unlock(&rs->rs_recv_lock); + } + + spin_unlock_bh(&rds_sock_lock); + + lens->nr = total; + lens->each = sizeof(struct rds6_info_message); +} +#endif + static void rds_sock_info(struct socket *sock, unsigned int len, struct rds_info_iterator *iter, struct rds_info_lengths *lens) { struct rds_info_socket sinfo; + unsigned int cnt = 0; struct rds_sock *rs; len /= sizeof(struct rds_info_socket); spin_lock_bh(&rds_sock_lock); - if (len < rds_sock_count) + if (len < rds_sock_count) { + cnt = rds_sock_count; goto out; + } list_for_each_entry(rs, &rds_sock_list, rs_item) { + /* This option only supports IPv4 sockets. */ + if (!ipv6_addr_v4mapped(&rs->rs_bound_addr)) + continue; sinfo.sndbuf = rds_sk_sndbuf(rs); sinfo.rcvbuf = rds_sk_rcvbuf(rs); sinfo.bound_addr = rs->rs_bound_addr_v4; @@ -786,15 +829,51 @@ static void rds_sock_info(struct socket *sock, unsigned int len, sinfo.inum = sock_i_ino(rds_rs_to_sk(rs)); rds_info_copy(iter, &sinfo, sizeof(sinfo)); + cnt++; } out: - lens->nr = rds_sock_count; + lens->nr = cnt; lens->each = sizeof(struct rds_info_socket); spin_unlock_bh(&rds_sock_lock); } +#if IS_ENABLED(CONFIG_IPV6) +static void rds6_sock_info(struct socket *sock, unsigned int len, + struct rds_info_iterator *iter, + struct rds_info_lengths *lens) +{ + struct rds6_info_socket sinfo6; + struct rds_sock *rs; + + len /= sizeof(struct rds6_info_socket); + + spin_lock_bh(&rds_sock_lock); + + if (len < rds_sock_count) + goto out; + + list_for_each_entry(rs, &rds_sock_list, rs_item) { + sinfo6.sndbuf = rds_sk_sndbuf(rs); + sinfo6.rcvbuf = rds_sk_rcvbuf(rs); + sinfo6.bound_addr = rs->rs_bound_addr; + sinfo6.connected_addr = rs->rs_conn_addr; + sinfo6.bound_port = rs->rs_bound_port; + sinfo6.connected_port = rs->rs_conn_port; + sinfo6.inum = sock_i_ino(rds_rs_to_sk(rs)); + + rds_info_copy(iter, &sinfo6, sizeof(sinfo6)); + } + + out: + lens->nr = rds_sock_count; + lens->each = sizeof(struct rds6_info_socket); + + spin_unlock_bh(&rds_sock_lock); +} +#endif + static void rds_exit(void) { sock_unregister(rds_family_ops.family); @@ -808,6 +887,10 @@ static void rds_exit(void) rds_bind_lock_destroy(); rds_info_deregister_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_deregister_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); +#if IS_ENABLED(CONFIG_IPV6) + rds_info_deregister_func(RDS6_INFO_SOCKETS, rds6_sock_info); + rds_info_deregister_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info); +#endif } module_exit(rds_exit); @@ -845,6 +928,10 @@ static int rds_init(void) rds_info_register_func(RDS_INFO_SOCKETS, rds_sock_info); rds_info_register_func(RDS_INFO_RECV_MESSAGES, rds_sock_inc_info); +#if IS_ENABLED(CONFIG_IPV6) + rds_info_register_func(RDS6_INFO_SOCKETS, rds6_sock_info); + rds_info_register_func(RDS6_INFO_RECV_MESSAGES, rds6_sock_inc_info); +#endif goto out; diff --git a/net/rds/bind.c b/net/rds/bind.c index 05464fd7c17a..20c156a73e73 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -181,7 +181,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < sizeof(struct sockaddr_in) || sin->sin_addr.s_addr == htonl(INADDR_ANY) || sin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) + ipv4_is_multicast(sin->sin_addr.s_addr)) return -EINVAL; ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &v6addr); binding_addr = &v6addr; @@ -206,7 +206,7 @@ int rds_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) + ipv4_is_multicast(addr4)) return -EINVAL; } /* The scope ID must be specified for link local address. */ diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 3cae88cbdaa0..a0f99bbf362c 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -385,6 +385,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) unsigned int posted = 0; int ret = 0; bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); + bool must_wake = false; u32 pos; /* the goal here is to just make sure that someone, somewhere @@ -405,6 +406,7 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) recv = &ic->i_recvs[pos]; ret = rds_ib_recv_refill_one(conn, recv, gfp); if (ret) { + must_wake = true; break; } @@ -423,6 +425,11 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) } posted++; + + if ((posted > 128 && need_resched()) || posted > 8192) { + must_wake = true; + break; + } } /* We're doing flow control - update the window. */ @@ -445,10 +452,13 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) * if we should requeue. */ if (rds_conn_up(conn) && - ((can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || + (must_wake || + (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || rds_ib_ring_empty(&ic->i_recv_ring))) { queue_delayed_work(rds_wq, &conn->c_recv_w, 1); } + if (can_wait) + cond_resched(); } /* @@ -1038,9 +1048,14 @@ int rds_ib_recv_init(void) si_meminfo(&si); rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; - rds_ib_incoming_slab = kmem_cache_create("rds_ib_incoming", - sizeof(struct rds_ib_incoming), - 0, SLAB_HWCACHE_ALIGN, NULL); + rds_ib_incoming_slab = + kmem_cache_create_usercopy("rds_ib_incoming", + sizeof(struct rds_ib_incoming), + 0, SLAB_HWCACHE_ALIGN, + offsetof(struct rds_ib_incoming, + ii_inc.i_usercopy), + sizeof(struct rds_inc_usercopy), + NULL); if (!rds_ib_incoming_slab) goto out; diff --git a/net/rds/ib_stats.c b/net/rds/ib_stats.c index 9252ad126335..ac46d8961b61 100644 --- a/net/rds/ib_stats.c +++ b/net/rds/ib_stats.c @@ -42,7 +42,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_ib_statistics, rds_ib_stats); static const char *const rds_ib_stat_names[] = { "ib_connect_raced", "ib_listen_closed_stale", - "s_ib_evt_handler_call", + "ib_evt_handler_call", "ib_tasklet_call", "ib_tx_cq_event", "ib_tx_ring_full", diff --git a/net/rds/rds.h b/net/rds/rds.h index f0066d168499..53e86911773a 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -271,6 +271,12 @@ struct rds_ext_header_rdma_dest { #define RDS_MSG_RX_END 2 #define RDS_MSG_RX_CMSG 3 +/* The following values are whitelisted for usercopy */ +struct rds_inc_usercopy { + rds_rdma_cookie_t rdma_cookie; + ktime_t rx_tstamp; +}; + struct rds_incoming { refcount_t i_refcount; struct list_head i_item; @@ -280,8 +286,7 @@ struct rds_incoming { unsigned long i_rx_jiffies; struct in6_addr i_saddr; - rds_rdma_cookie_t i_rdma_cookie; - ktime_t i_rx_tstamp; + struct rds_inc_usercopy i_usercopy; u64 i_rx_lat_trace[RDS_RX_MAX_TRACES]; }; @@ -717,7 +722,7 @@ struct rds_statistics { uint64_t s_cong_send_blocked; uint64_t s_recv_bytes_added_to_socket; uint64_t s_recv_bytes_removed_from_socket; - + uint64_t s_send_stuck_rm; }; /* af_rds.c */ diff --git a/net/rds/recv.c b/net/rds/recv.c index a42ba7fa06d5..c8404971d5ab 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -47,8 +47,8 @@ void rds_inc_init(struct rds_incoming *inc, struct rds_connection *conn, INIT_LIST_HEAD(&inc->i_item); inc->i_conn = conn; inc->i_saddr = *saddr; - inc->i_rdma_cookie = 0; - inc->i_rx_tstamp = ktime_set(0, 0); + inc->i_usercopy.rdma_cookie = 0; + inc->i_usercopy.rx_tstamp = ktime_set(0, 0); memset(inc->i_rx_lat_trace, 0, sizeof(inc->i_rx_lat_trace)); } @@ -62,8 +62,8 @@ void rds_inc_path_init(struct rds_incoming *inc, struct rds_conn_path *cp, inc->i_conn = cp->cp_conn; inc->i_conn_path = cp; inc->i_saddr = *saddr; - inc->i_rdma_cookie = 0; - inc->i_rx_tstamp = ktime_set(0, 0); + inc->i_usercopy.rdma_cookie = 0; + inc->i_usercopy.rx_tstamp = ktime_set(0, 0); } EXPORT_SYMBOL_GPL(rds_inc_path_init); @@ -186,7 +186,7 @@ static void rds_recv_incoming_exthdrs(struct rds_incoming *inc, struct rds_sock case RDS_EXTHDR_RDMA_DEST: /* We ignore the size for now. We could stash it * somewhere and use it for error checking. */ - inc->i_rdma_cookie = rds_rdma_make_cookie( + inc->i_usercopy.rdma_cookie = rds_rdma_make_cookie( be32_to_cpu(buffer.rdma_dest.h_rdma_rkey), be32_to_cpu(buffer.rdma_dest.h_rdma_offset)); @@ -380,7 +380,7 @@ void rds_recv_incoming(struct rds_connection *conn, struct in6_addr *saddr, be32_to_cpu(inc->i_hdr.h_len), inc->i_hdr.h_dport); if (sock_flag(sk, SOCK_RCVTSTAMP)) - inc->i_rx_tstamp = ktime_get_real(); + inc->i_usercopy.rx_tstamp = ktime_get_real(); rds_inc_addref(inc); inc->i_rx_lat_trace[RDS_MSG_RX_END] = local_clock(); list_add_tail(&inc->i_item, &rs->rs_recv_queue); @@ -540,16 +540,18 @@ static int rds_cmsg_recv(struct rds_incoming *inc, struct msghdr *msg, { int ret = 0; - if (inc->i_rdma_cookie) { + if (inc->i_usercopy.rdma_cookie) { ret = put_cmsg(msg, SOL_RDS, RDS_CMSG_RDMA_DEST, - sizeof(inc->i_rdma_cookie), &inc->i_rdma_cookie); + sizeof(inc->i_usercopy.rdma_cookie), + &inc->i_usercopy.rdma_cookie); if (ret) goto out; } - if ((inc->i_rx_tstamp != 0) && + if ((inc->i_usercopy.rx_tstamp != 0) && sock_flag(rds_rs_to_sk(rs), SOCK_RCVTSTAMP)) { - struct __kernel_old_timeval tv = ns_to_kernel_old_timeval(inc->i_rx_tstamp); + struct __kernel_old_timeval tv = + ns_to_kernel_old_timeval(inc->i_usercopy.rx_tstamp); if (!sock_flag(rds_rs_to_sk(rs), SOCK_TSTAMP_NEW)) { ret = put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, diff --git a/net/rds/send.c b/net/rds/send.c index 031b1e97a466..82dcd8b84fe7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -145,6 +145,7 @@ int rds_send_xmit(struct rds_conn_path *cp) LIST_HEAD(to_be_dropped); int batch_count; unsigned long send_gen = 0; + int same_rm = 0; restart: batch_count = 0; @@ -200,6 +201,17 @@ restart: rm = cp->cp_xmit_rm; + if (!rm) { + same_rm = 0; + } else { + same_rm++; + if (same_rm >= 4096) { + rds_stats_inc(s_send_stuck_rm); + ret = -EAGAIN; + break; + } + } + /* * If between sending messages, we can send a pending congestion * map update. @@ -1132,7 +1144,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) case AF_INET: if (usin->sin_addr.s_addr == htonl(INADDR_ANY) || usin->sin_addr.s_addr == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(usin->sin_addr.s_addr))) { + ipv4_is_multicast(usin->sin_addr.s_addr)) { ret = -EINVAL; goto out; } @@ -1163,7 +1175,7 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len) addr4 = sin6->sin6_addr.s6_addr32[3]; if (addr4 == htonl(INADDR_ANY) || addr4 == htonl(INADDR_BROADCAST) || - IN_MULTICAST(ntohl(addr4))) { + ipv4_is_multicast(addr4)) { ret = -EINVAL; goto out; } diff --git a/net/rds/stats.c b/net/rds/stats.c index 73be187d389e..9e87da43c004 100644 --- a/net/rds/stats.c +++ b/net/rds/stats.c @@ -76,6 +76,9 @@ static const char *const rds_stat_names[] = { "cong_update_received", "cong_send_error", "cong_send_blocked", + "recv_bytes_added_to_sock", + "recv_bytes_freed_fromsock", + "send_stuck_rm", }; void rds_stats_info_copy(struct rds_info_iterator *iter, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 8051dfdcf26d..1091bf35a199 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -232,6 +232,9 @@ struct rxrpc_security { int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, unsigned int, unsigned int, rxrpc_seq_t, u16); + /* Free crypto request on a call */ + void (*free_call_crypto)(struct rxrpc_call *); + /* Locate the data in a received packet that has been verified. */ void (*locate_data)(struct rxrpc_call *, struct sk_buff *, unsigned int *, unsigned int *); @@ -564,6 +567,7 @@ struct rxrpc_call { unsigned long expect_term_by; /* When we expect call termination by */ u32 next_rx_timo; /* Timeout for next Rx packet (jif) */ u32 next_req_timo; /* Timeout for next Rx request packet (jif) */ + struct skcipher_request *cipher_req; /* Packet cipher request buffer */ struct timer_list timer; /* Combined event timer */ struct work_struct processor; /* Event processor */ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 014548c259ce..32d8dc677142 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -488,8 +488,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - if (conn) + if (conn) { rxrpc_disconnect_call(call); + conn->security->free_call_crypto(call); + } rxrpc_cleanup_ring(call); _leave(""); diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index a29d26c273b5..f6c59f5fae9d 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -33,6 +33,10 @@ static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, return 0; } +static void none_free_call_crypto(struct rxrpc_call *call) +{ +} + static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, unsigned int *_offset, unsigned int *_len) { @@ -83,6 +87,7 @@ const struct rxrpc_security rxrpc_no_security = { .exit = none_exit, .init_connection_security = none_init_connection_security, .prime_packet_security = none_prime_packet_security, + .free_call_crypto = none_free_call_crypto, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, .locate_data = none_locate_data, diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index c60c520fde7c..8d8aa3c230b5 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -43,6 +43,7 @@ struct rxkad_level2_hdr { * packets */ static struct crypto_sync_skcipher *rxkad_ci; +static struct skcipher_request *rxkad_ci_req; static DEFINE_MUTEX(rxkad_ci_mutex); /* @@ -99,8 +100,8 @@ error: */ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) { + struct skcipher_request *req; struct rxrpc_key_token *token; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); struct scatterlist sg; struct rxrpc_crypt iv; __be32 *tmpbuf; @@ -115,6 +116,12 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) if (!tmpbuf) return -ENOMEM; + req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) { + kfree(tmpbuf); + return -ENOMEM; + } + token = conn->params.key->payload.data[0]; memcpy(&iv, token->kad->session_key, sizeof(iv)); @@ -128,7 +135,7 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, &sg, &sg, tmpsize, iv.x); crypto_skcipher_encrypt(req); - skcipher_request_zero(req); + skcipher_request_free(req); memcpy(&conn->csum_iv, tmpbuf + 2, sizeof(conn->csum_iv)); kfree(tmpbuf); @@ -137,6 +144,35 @@ static int rxkad_prime_packet_security(struct rxrpc_connection *conn) } /* + * Allocate and prepare the crypto request on a call. For any particular call, + * this is called serially for the packets, so no lock should be necessary. + */ +static struct skcipher_request *rxkad_get_call_crypto(struct rxrpc_call *call) +{ + struct crypto_skcipher *tfm = &call->conn->cipher->base; + struct skcipher_request *cipher_req = call->cipher_req; + + if (!cipher_req) { + cipher_req = skcipher_request_alloc(tfm, GFP_NOFS); + if (!cipher_req) + return NULL; + call->cipher_req = cipher_req; + } + + return cipher_req; +} + +/* + * Clean up the crypto on a call. + */ +static void rxkad_free_call_crypto(struct rxrpc_call *call) +{ + if (call->cipher_req) + skcipher_request_free(call->cipher_req); + call->cipher_req = NULL; +} + +/* * partially encrypt a packet (level 1 security) */ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, @@ -243,7 +279,7 @@ static int rxkad_secure_packet(struct rxrpc_call *call, void *sechdr) { struct rxrpc_skb_priv *sp; - SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; u32 x, y; @@ -262,6 +298,10 @@ static int rxkad_secure_packet(struct rxrpc_call *call, if (ret < 0) return ret; + req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); @@ -488,7 +528,7 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, unsigned int offset, unsigned int len, rxrpc_seq_t seq, u16 expected_cksum) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg; bool aborted; @@ -501,6 +541,10 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, if (!call->conn->cipher) return 0; + req = rxkad_get_call_crypto(call); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); @@ -733,14 +777,18 @@ static void rxkad_calc_response_checksum(struct rxkad_response *response) /* * encrypt the response packet */ -static void rxkad_encrypt_response(struct rxrpc_connection *conn, - struct rxkad_response *resp, - const struct rxkad_key *s2) +static int rxkad_encrypt_response(struct rxrpc_connection *conn, + struct rxkad_response *resp, + const struct rxkad_key *s2) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, conn->cipher); + struct skcipher_request *req; struct rxrpc_crypt iv; struct scatterlist sg[1]; + req = skcipher_request_alloc(&conn->cipher->base, GFP_NOFS); + if (!req) + return -ENOMEM; + /* continue encrypting from where we left off */ memcpy(&iv, s2->session_key, sizeof(iv)); @@ -750,7 +798,8 @@ static void rxkad_encrypt_response(struct rxrpc_connection *conn, skcipher_request_set_callback(req, 0, NULL, NULL); skcipher_request_set_crypt(req, sg, sg, sizeof(resp->encrypted), iv.x); crypto_skcipher_encrypt(req); - skcipher_request_zero(req); + skcipher_request_free(req); + return 0; } /* @@ -825,8 +874,9 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, /* calculate the response checksum and then do the encryption */ rxkad_calc_response_checksum(resp); - rxkad_encrypt_response(conn, resp, token->kad); - ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); + ret = rxkad_encrypt_response(conn, resp, token->kad); + if (ret == 0) + ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad); kfree(resp); return ret; @@ -1003,18 +1053,16 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn, struct rxkad_response *resp, const struct rxrpc_crypt *session_key) { - SYNC_SKCIPHER_REQUEST_ON_STACK(req, rxkad_ci); + struct skcipher_request *req = rxkad_ci_req; struct scatterlist sg[1]; struct rxrpc_crypt iv; _enter(",,%08x%08x", ntohl(session_key->n[0]), ntohl(session_key->n[1])); - ASSERT(rxkad_ci != NULL); - mutex_lock(&rxkad_ci_mutex); if (crypto_sync_skcipher_setkey(rxkad_ci, session_key->x, - sizeof(*session_key)) < 0) + sizeof(*session_key)) < 0) BUG(); memcpy(&iv, session_key, sizeof(iv)); @@ -1208,10 +1256,26 @@ static void rxkad_clear(struct rxrpc_connection *conn) */ static int rxkad_init(void) { + struct crypto_sync_skcipher *tfm; + struct skcipher_request *req; + /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ - rxkad_ci = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); - return PTR_ERR_OR_ZERO(rxkad_ci); + tfm = crypto_alloc_sync_skcipher("pcbc(fcrypt)", 0, 0); + if (IS_ERR(tfm)) + return PTR_ERR(tfm); + + req = skcipher_request_alloc(&tfm->base, GFP_KERNEL); + if (!req) + goto nomem_tfm; + + rxkad_ci_req = req; + rxkad_ci = tfm; + return 0; + +nomem_tfm: + crypto_free_sync_skcipher(tfm); + return -ENOMEM; } /* @@ -1219,8 +1283,8 @@ static int rxkad_init(void) */ static void rxkad_exit(void) { - if (rxkad_ci) - crypto_free_sync_skcipher(rxkad_ci); + crypto_free_sync_skcipher(rxkad_ci); + skcipher_request_free(rxkad_ci_req); } /* @@ -1235,6 +1299,7 @@ const struct rxrpc_security rxkad = { .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .free_call_crypto = rxkad_free_call_crypto, .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, diff --git a/net/sched/Kconfig b/net/sched/Kconfig index afd2ba157a13..b3faafeafab9 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -963,6 +963,19 @@ config NET_IFE_SKBTCINDEX tristate "Support to encoding decoding skb tcindex on IFE action" depends on NET_ACT_IFE +config NET_TC_SKB_EXT + bool "TC recirculation support" + depends on NET_CLS_ACT + default y if NET_CLS_ACT + select SKB_EXTENSIONS + + help + Say Y here to allow tc chain misses to continue in OvS datapath in + the correct recirc_id, and hardware chain misses to continue in + the correct chain in tc software datapath. + + Say N here if you won't be using tc<->ovs offload or tc chains offload. + endif # NET_SCHED config NET_SCH_FIFO diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index cdd6f3818097..fcc46025e790 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -24,12 +24,12 @@ #include <uapi/linux/tc_act/tc_ct.h> #include <net/tc_act/tc_ct.h> -#include <linux/netfilter/nf_nat.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/ipv6/nf_defrag_ipv6.h> +#include <uapi/linux/netfilter/nf_nat.h> static struct tc_action_ops act_ct_ops; static unsigned int ct_net_id; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 9d1bf508075a..9ce073a05414 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -408,25 +408,31 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; -static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) +static void tcf_mirred_dev_put(void *priv) +{ + struct net_device *dev = priv; + + dev_put(dev); +} + +static struct net_device * +tcf_mirred_get_dev(const struct tc_action *a, + tc_action_priv_destructor *destructor) { struct tcf_mirred *m = to_mirred(a); struct net_device *dev; rcu_read_lock(); dev = rcu_dereference(m->tcfm_dev); - if (dev) + if (dev) { dev_hold(dev); + *destructor = tcf_mirred_dev_put; + } rcu_read_unlock(); return dev; } -static void tcf_mirred_put_dev(struct net_device *dev) -{ - dev_put(dev); -} - static size_t tcf_mirred_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_mirred)); @@ -446,7 +452,6 @@ static struct tc_action_ops act_mirred_ops = { .get_fill_size = tcf_mirred_get_fill_size, .size = sizeof(struct tcf_mirred), .get_dev = tcf_mirred_get_dev, - .put_dev = tcf_mirred_put_dev, }; static __net_init int mirred_init_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 6315e0f8d26e..89c04c52af3d 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -40,6 +40,8 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, [TCA_POLICE_AVRATE] = { .type = NLA_U32 }, [TCA_POLICE_RESULT] = { .type = NLA_U32 }, + [TCA_POLICE_RATE64] = { .type = NLA_U64 }, + [TCA_POLICE_PEAKRATE64] = { .type = NLA_U64 }, }; static int tcf_police_init(struct net *net, struct nlattr *nla, @@ -58,6 +60,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tcf_police_params *new; bool exists = false; u32 index; + u64 rate64, prate64; if (nla == NULL) return -EINVAL; @@ -155,14 +158,18 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, } if (R_tab) { new->rate_present = true; - psched_ratecfg_precompute(&new->rate, &R_tab->rate, 0); + rate64 = tb[TCA_POLICE_RATE64] ? + nla_get_u64(tb[TCA_POLICE_RATE64]) : 0; + psched_ratecfg_precompute(&new->rate, &R_tab->rate, rate64); qdisc_put_rtab(R_tab); } else { new->rate_present = false; } if (P_tab) { new->peak_present = true; - psched_ratecfg_precompute(&new->peak, &P_tab->rate, 0); + prate64 = tb[TCA_POLICE_PEAKRATE64] ? + nla_get_u64(tb[TCA_POLICE_PEAKRATE64]) : 0; + psched_ratecfg_precompute(&new->peak, &P_tab->rate, prate64); qdisc_put_rtab(P_tab); } else { new->peak_present = false; @@ -313,10 +320,22 @@ static int tcf_police_dump(struct sk_buff *skb, struct tc_action *a, lockdep_is_held(&police->tcf_lock)); opt.mtu = p->tcfp_mtu; opt.burst = PSCHED_NS2TICKS(p->tcfp_burst); - if (p->rate_present) + if (p->rate_present) { psched_ratecfg_getrate(&opt.rate, &p->rate); - if (p->peak_present) + if ((police->params->rate.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_POLICE_RATE64, + police->params->rate.rate_bytes_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + } + if (p->peak_present) { psched_ratecfg_getrate(&opt.peakrate, &p->peak); + if ((police->params->peak.rate_bytes_ps >= (1ULL << 32)) && + nla_put_u64_64bit(skb, TCA_POLICE_PEAKRATE64, + police->params->peak.rate_bytes_ps, + TCA_POLICE_PAD)) + goto nla_put_failure; + } if (nla_put(skb, TCA_POLICE_TBF, sizeof(opt), &opt)) goto nla_put_failure; if (p->tcfp_result && diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 10229124a992..692c4c9040fd 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -252,6 +252,32 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) return tcf_idr_search(tn, a, index); } +static void tcf_psample_group_put(void *priv) +{ + struct psample_group *group = priv; + + psample_group_put(group); +} + +static struct psample_group * +tcf_sample_get_group(const struct tc_action *a, + tc_action_priv_destructor *destructor) +{ + struct tcf_sample *s = to_sample(a); + struct psample_group *group; + + spin_lock_bh(&s->tcf_lock); + group = rcu_dereference_protected(s->psample_group, + lockdep_is_held(&s->tcf_lock)); + if (group) { + psample_group_take(group); + *destructor = tcf_psample_group_put; + } + spin_unlock_bh(&s->tcf_lock); + + return group; +} + static struct tc_action_ops act_sample_ops = { .kind = "sample", .id = TCA_ID_SAMPLE, @@ -262,6 +288,7 @@ static struct tc_action_ops act_sample_ops = { .cleanup = tcf_sample_cleanup, .walk = tcf_sample_walker, .lookup = tcf_sample_search, + .get_psample_group = tcf_sample_get_group, .size = sizeof(struct tcf_sample), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 287a30bf8930..08aaf719a70f 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -301,6 +301,19 @@ static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, return tcf_generic_walker(tn, skb, cb, type, ops, extack); } +static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse, bool hw) +{ + struct tcf_vlan *v = to_vlan(a); + struct tcf_t *tm = &v->tcf_tm; + + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); + if (hw) + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats_hw), + bytes, packets); + tm->lastuse = max_t(u64, tm->lastuse, lastuse); +} + static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, vlan_net_id); @@ -325,6 +338,7 @@ static struct tc_action_ops act_vlan_ops = { .init = tcf_vlan_init, .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, + .stats_update = tcf_vlan_stats_update, .get_fill_size = tcf_vlan_get_fill_size, .lookup = tcf_vlan_search, .size = sizeof(struct tcf_vlan), diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index efd3cfb80a2a..32577c248968 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -36,6 +36,8 @@ #include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_ct.h> +#include <net/tc_act/tc_mpls.h> +#include <net/flow_offload.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -544,235 +546,73 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) } } -static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) -{ - const struct Qdisc_class_ops *cops; - struct Qdisc *qdisc; - - if (!dev_ingress_queue(dev)) - return NULL; - - qdisc = dev_ingress_queue(dev)->qdisc_sleeping; - if (!qdisc) - return NULL; - - cops = qdisc->ops->cl_ops; - if (!cops) - return NULL; - - if (!cops->tcf_block) - return NULL; - - return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); -} - -static struct rhashtable indr_setup_block_ht; - -struct tc_indr_block_dev { - struct rhash_head ht_node; - struct net_device *dev; - unsigned int refcnt; - struct list_head cb_list; - struct tcf_block *block; -}; - -struct tc_indr_block_cb { - struct list_head list; - void *cb_priv; - tc_indr_block_bind_cb_t *cb; - void *cb_ident; -}; - -static const struct rhashtable_params tc_indr_setup_block_ht_params = { - .key_offset = offsetof(struct tc_indr_block_dev, dev), - .head_offset = offsetof(struct tc_indr_block_dev, ht_node), - .key_len = sizeof(struct net_device *), -}; - -static struct tc_indr_block_dev * -tc_indr_block_dev_lookup(struct net_device *dev) -{ - return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, - tc_indr_setup_block_ht_params); -} - -static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev) -{ - struct tc_indr_block_dev *indr_dev; - - indr_dev = tc_indr_block_dev_lookup(dev); - if (indr_dev) - goto inc_ref; - - indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); - if (!indr_dev) - return NULL; - - INIT_LIST_HEAD(&indr_dev->cb_list); - indr_dev->dev = dev; - indr_dev->block = tc_dev_ingress_block(dev); - if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params)) { - kfree(indr_dev); - return NULL; - } - -inc_ref: - indr_dev->refcnt++; - return indr_dev; -} - -static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev) -{ - if (--indr_dev->refcnt) - return; - - rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params); - kfree(indr_dev); -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - if (indr_block_cb->cb == cb && - indr_block_cb->cb_ident == cb_ident) - return indr_block_cb; - return NULL; -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (indr_block_cb) - return ERR_PTR(-EEXIST); - - indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); - if (!indr_block_cb) - return ERR_PTR(-ENOMEM); - - indr_block_cb->cb_priv = cb_priv; - indr_block_cb->cb = cb; - indr_block_cb->cb_ident = cb_ident; - list_add(&indr_block_cb->list, &indr_dev->cb_list); - - return indr_block_cb; -} - -static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb) -{ - list_del(&indr_block_cb->list); - kfree(indr_block_cb); -} - static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); -static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, - struct tc_indr_block_cb *indr_block_cb, +static void tc_indr_block_ing_cmd(struct net_device *dev, + struct tcf_block *block, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, enum flow_block_command command) { struct flow_block_offload bo = { .command = command, .binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, - .net = dev_net(indr_dev->dev), - .block_shared = tcf_block_non_null_shared(indr_dev->block), + .net = dev_net(dev), + .block_shared = tcf_block_non_null_shared(block), }; INIT_LIST_HEAD(&bo.cb_list); - if (!indr_dev->block) + if (!block) return; - bo.block = &indr_dev->block->flow_block; - - indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - tcf_block_setup(indr_dev->block, &bo); -} - -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; - int err; - - indr_dev = tc_indr_block_dev_get(dev); - if (!indr_dev) - return -ENOMEM; - - indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); - err = PTR_ERR_OR_ZERO(indr_block_cb); - if (err) - goto err_dev_put; + bo.block = &block->flow_block; - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND); - return 0; + down_write(&block->cb_lock); + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); -err_dev_put: - tc_indr_block_dev_put(indr_dev); - return err; + tcf_block_setup(block, &bo); + up_write(&block->cb_lock); } -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register); -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) { - int err; - - rtnl_lock(); - err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident); - rtnl_unlock(); + const struct Qdisc_class_ops *cops; + struct Qdisc *qdisc; - return err; -} -EXPORT_SYMBOL_GPL(tc_indr_block_cb_register); + if (!dev_ingress_queue(dev)) + return NULL; -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; + qdisc = dev_ingress_queue(dev)->qdisc_sleeping; + if (!qdisc) + return NULL; - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; + cops = qdisc->ops->cl_ops; + if (!cops) + return NULL; - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (!indr_block_cb) - return; + if (!cops->tcf_block) + return NULL; - /* Send unbind message if required to free any block cbs. */ - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND); - tc_indr_block_cb_del(indr_block_cb); - tc_indr_block_dev_put(indr_dev); + return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); } -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister); -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static void tc_indr_block_get_and_ing_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) { - rtnl_lock(); - __tc_indr_block_cb_unregister(dev, cb, cb_ident); - rtnl_unlock(); + struct tcf_block *block = tc_dev_ingress_block(dev); + + tc_indr_block_ing_cmd(dev, block, cb, cb_priv, command); } -EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister); -static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, +static void tc_indr_block_call(struct tcf_block *block, + struct net_device *dev, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; struct flow_block_offload bo = { .command = command, .binder_type = ei->binder_type, @@ -783,22 +623,13 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, }; INIT_LIST_HEAD(&bo.cb_list); - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; - - indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - + flow_indr_block_call(dev, &bo, command); tcf_block_setup(block, &bo); } static bool tcf_block_offload_in_use(struct tcf_block *block) { - return block->offloadcnt; + return atomic_read(&block->offloadcnt); } static int tcf_block_offload_cmd(struct tcf_block *block, @@ -832,6 +663,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_inc; @@ -840,24 +672,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, */ if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_unlock; } err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) - return err; + goto err_unlock; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); + up_write(&block->cb_lock); return 0; no_offload_dev_inc: - if (tcf_block_offload_in_use(block)) - return -EOPNOTSUPP; + if (tcf_block_offload_in_use(block)) { + err = -EOPNOTSUPP; + goto err_unlock; + } + err = 0; block->nooffloaddevcnt++; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); - return 0; +err_unlock: + up_write(&block->cb_lock); + return err; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, @@ -866,6 +705,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (!dev->netdev_ops->ndo_setup_tc) @@ -873,10 +713,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; + up_write(&block->cb_lock); return; no_offload_dev_dec: WARN_ON(block->nooffloaddevcnt-- == 0); + up_write(&block->cb_lock); } static int @@ -991,6 +833,7 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->owner_list); @@ -1526,6 +1369,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, struct tcf_proto *tp, *tp_prev; int err; + lockdep_assert_held(&block->cb_lock); + for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, @@ -1564,6 +1409,8 @@ static int tcf_block_bind(struct tcf_block *block, struct flow_block_cb *block_cb, *next; int err, i = 0; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry(block_cb, &bo->cb_list, list) { err = tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, true, @@ -1571,6 +1418,8 @@ static int tcf_block_bind(struct tcf_block *block, bo->extack); if (err) goto err_unroll; + if (!bo->unlocked_driver_cb) + block->lockeddevcnt++; i++; } @@ -1586,6 +1435,8 @@ err_unroll: block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } flow_block_cb_free(block_cb); } @@ -1598,6 +1449,8 @@ static void tcf_block_unbind(struct tcf_block *block, { struct flow_block_cb *block_cb, *next; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, @@ -1605,6 +1458,8 @@ static void tcf_block_unbind(struct tcf_block *block, NULL); list_del(&block_cb->list); flow_block_cb_free(block_cb); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } } @@ -1659,6 +1514,18 @@ reclassify: goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + { + struct tc_skb_ext *ext; + + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + + ext->chain = err & TC_ACT_EXT_VAL_MASK; + } +#endif goto reset; } #endif @@ -3151,17 +3018,61 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop) +static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) +{ + if (*flags & TCA_CLS_FLAGS_IN_HW) + return; + *flags |= TCA_CLS_FLAGS_IN_HW; + atomic_inc(&block->offloadcnt); +} + +static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) +{ + if (!(*flags & TCA_CLS_FLAGS_IN_HW)) + return; + *flags &= ~TCA_CLS_FLAGS_IN_HW; + atomic_dec(&block->offloadcnt); +} + +static void tc_cls_offload_cnt_update(struct tcf_block *block, + struct tcf_proto *tp, u32 *cnt, + u32 *flags, u32 diff, bool add) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + if (add) { + if (!*cnt) + tcf_block_offload_inc(block, flags); + *cnt += diff; + } else { + *cnt -= diff; + if (!*cnt) + tcf_block_offload_dec(block, flags); + } + spin_unlock(&tp->lock); +} + +static void +tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, + u32 *cnt, u32 *flags) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + tcf_block_offload_dec(block, flags); + *cnt = 0; + spin_unlock(&tp->lock); +} + +static int +__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) { struct flow_block_cb *block_cb; int ok_count = 0; int err; - /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) - return -EOPNOTSUPP; - list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { @@ -3173,17 +3084,261 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, } return ok_count; } + +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count; +} EXPORT_SYMBOL(tc_setup_cb_call); +/* Non-destructive filter add. If filter that wasn't already in hardware is + * successfully offloaded, increment block offloads counter. On failure, + * previously offloaded filter is considered to be intact and offloads counter + * is not decremented. + */ + +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, + ok_count, true); +err_unlock: + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_add); + +/* Destructive filter replace. If filter that wasn't already in hardware is + * successfully offloaded, increment block offload counter. On failure, + * previously offloaded filter is considered to be destroyed and offload counter + * is decremented. + */ + +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, new_in_hw_count, + new_flags, ok_count, true); +err_unlock: + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_replace); + +/* Destroy filter and decrement block offload counter, if filter was previously + * offloaded. + */ + +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_destroy); + +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count) +{ + int err = cb(type, type_data, cb_priv); + + if (err) { + if (add && tc_skip_sw(*flags)) + return err; + } else { + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, + add); + } + + return 0; +} +EXPORT_SYMBOL(tc_setup_cb_reoffload); + +void tc_cleanup_flow_action(struct flow_action *flow_action) +{ + struct flow_action_entry *entry; + int i; + + flow_action_for_each(i, entry, flow_action) + if (entry->destructor) + entry->destructor(entry->destructor_priv); +} +EXPORT_SYMBOL(tc_cleanup_flow_action); + +static void tcf_mirred_get_dev(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->dev = act->ops->get_dev(act, &entry->destructor); + if (!entry->dev) + return; + entry->destructor_priv = entry->dev; +#endif +} + +static void tcf_tunnel_encap_put_tunnel(void *priv) +{ + struct ip_tunnel_info *tunnel = priv; + + kfree(tunnel); +} + +static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) + return -ENOMEM; + entry->destructor = tcf_tunnel_encap_put_tunnel; + entry->destructor_priv = entry->tunnel; + return 0; +} + +static void tcf_sample_get_group(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->sample.psample_group = + act->ops->get_psample_group(act, &entry->destructor); + entry->destructor_priv = entry->sample.psample_group; +#endif +} + int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, bool rtnl_held) { const struct tc_action *act; - int i, j, k; + int i, j, k, err = 0; if (!exts) return 0; + if (!rtnl_held) + rtnl_lock(); + j = 0; tcf_exts_for_each_action(i, act, exts) { struct flow_action_entry *entry; @@ -3200,10 +3355,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->chain_index = tcf_gact_goto_chain_index(act); } else if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; - entry->dev = tcf_mirred_dev(act); + tcf_mirred_get_dev(entry, act); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; - entry->dev = tcf_mirred_dev(act); + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT_INGRESS; + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED_INGRESS; + tcf_mirred_get_dev(entry, act); } else if (is_tcf_vlan(act)) { switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: @@ -3222,11 +3383,14 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->vlan.prio = tcf_vlan_push_prio(act); break; default: + err = -EOPNOTSUPP; goto err_out; } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; - entry->tunnel = tcf_tunnel_info(act); + err = tcf_tunnel_encap_get_tunnel(entry, act); + if (err) + goto err_out; } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { @@ -3239,6 +3403,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_ADD; break; default: + err = -EOPNOTSUPP; goto err_out; } entry->mangle.htype = tcf_pedit_htype(act, k); @@ -3255,11 +3420,10 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mark = tcf_skbedit_mark(act); } else if (is_tcf_sample(act)) { entry->id = FLOW_ACTION_SAMPLE; - entry->sample.psample_group = - tcf_sample_psample_group(act); entry->sample.trunc_size = tcf_sample_trunc_size(act); entry->sample.truncate = tcf_sample_truncate(act); entry->sample.rate = tcf_sample_rate(act); + tcf_sample_get_group(entry, act); } else if (is_tcf_police(act)) { entry->id = FLOW_ACTION_POLICE; entry->police.burst = tcf_police_tcfp_burst(act); @@ -3269,16 +3433,50 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); + } else if (is_tcf_mpls(act)) { + switch (tcf_mpls_action(act)) { + case TCA_MPLS_ACT_PUSH: + entry->id = FLOW_ACTION_MPLS_PUSH; + entry->mpls_push.proto = tcf_mpls_proto(act); + entry->mpls_push.label = tcf_mpls_label(act); + entry->mpls_push.tc = tcf_mpls_tc(act); + entry->mpls_push.bos = tcf_mpls_bos(act); + entry->mpls_push.ttl = tcf_mpls_ttl(act); + break; + case TCA_MPLS_ACT_POP: + entry->id = FLOW_ACTION_MPLS_POP; + entry->mpls_pop.proto = tcf_mpls_proto(act); + break; + case TCA_MPLS_ACT_MODIFY: + entry->id = FLOW_ACTION_MPLS_MANGLE; + entry->mpls_mangle.label = tcf_mpls_label(act); + entry->mpls_mangle.tc = tcf_mpls_tc(act); + entry->mpls_mangle.bos = tcf_mpls_bos(act); + entry->mpls_mangle.ttl = tcf_mpls_ttl(act); + break; + default: + goto err_out; + } + } else if (is_tcf_skbedit_ptype(act)) { + entry->id = FLOW_ACTION_PTYPE; + entry->ptype = tcf_skbedit_ptype(act); } else { + err = -EOPNOTSUPP; goto err_out; } if (!is_tcf_pedit(act)) j++; } - return 0; + err_out: - return -EOPNOTSUPP; + if (!rtnl_held) + rtnl_unlock(); + + if (err) + tc_cleanup_flow_action(flow_action); + + return err; } EXPORT_SYMBOL(tc_setup_flow_action); @@ -3321,6 +3519,11 @@ static struct pernet_operations tcf_net_ops = { .size = sizeof(struct tcf_net), }; +static struct flow_indr_block_ing_entry block_ing_entry = { + .cb = tc_indr_block_get_and_ing_cmd, + .list = LIST_HEAD_INIT(block_ing_entry.list), +}; + static int __init tc_filter_init(void) { int err; @@ -3333,10 +3536,7 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; - err = rhashtable_init(&indr_setup_block_ht, - &tc_indr_setup_block_ht_params); - if (err) - goto err_rhash_setup_block_ht; + flow_indr_add_block_ing_cb(&block_ing_entry); rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, RTNL_FLAG_DOIT_UNLOCKED); @@ -3351,8 +3551,6 @@ static int __init tc_filter_init(void) return 0; -err_rhash_setup_block_ht: - unregister_pernet_subsys(&tcf_net_ops); err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index 3f7a9c02b70c..bf10bdaf5012 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -163,17 +163,19 @@ static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, cls_bpf.exts_integrated = obj->exts_integrated; if (oldprog) - tcf_block_offload_dec(block, &oldprog->gen_flags); + err = tc_setup_cb_replace(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &oldprog->gen_flags, + &oldprog->in_hw_count, + &prog->gen_flags, &prog->in_hw_count, + true); + else + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSBPF, &cls_bpf, + skip_sw, &prog->gen_flags, + &prog->in_hw_count, true); - err = tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); - if (prog) { - if (err < 0) { - cls_bpf_offload_cmd(tp, oldprog, prog, extack); - return err; - } else if (err > 0) { - prog->in_hw_count = err; - tcf_block_offload_inc(block, &prog->gen_flags); - } + if (prog && err) { + cls_bpf_offload_cmd(tp, oldprog, prog, extack); + return err; } if (prog && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) @@ -230,7 +232,7 @@ static void cls_bpf_offload_update_stats(struct tcf_proto *tp, cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false); + tc_setup_cb_call(block, TC_SETUP_CLSBPF, &cls_bpf, false, true); } static int cls_bpf_init(struct tcf_proto *tp) @@ -673,15 +675,11 @@ static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb cls_bpf.name = prog->bpf_name; cls_bpf.exts_integrated = prog->exts_integrated; - err = cb(TC_SETUP_CLSBPF, &cls_bpf, cb_priv); - if (err) { - if (add && tc_skip_sw(prog->gen_flags)) - return err; - continue; - } - - tc_cls_offload_cnt_update(block, &prog->in_hw_count, - &prog->gen_flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSBPF, + &cls_bpf, cb_priv, &prog->gen_flags, + &prog->in_hw_count); + if (err) + return err; } return 0; diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 054123742e32..74221e3351c3 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -412,41 +412,27 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f, struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; - if (!rtnl_held) - rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = FLOW_CLS_DESTROY; cls_flower.cookie = (unsigned long) f; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); - spin_lock(&tp->lock); - list_del_init(&f->hw_list); - tcf_block_offload_dec(block, &f->flags); - spin_unlock(&tp->lock); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, false, + &f->flags, &f->in_hw_count, rtnl_held); - if (!rtnl_held) - rtnl_unlock(); } static int fl_hw_replace_filter(struct tcf_proto *tp, struct cls_fl_filter *f, bool rtnl_held, struct netlink_ext_ack *extack) { - struct cls_fl_head *head = fl_head_dereference(tp); struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; bool skip_sw = tc_skip_sw(f->flags); int err = 0; - if (!rtnl_held) - rtnl_lock(); - cls_flower.rule = flow_rule_alloc(tcf_exts_num_actions(&f->exts)); - if (!cls_flower.rule) { - err = -ENOMEM; - goto errout; - } + if (!cls_flower.rule) + return -ENOMEM; tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, extack); cls_flower.command = FLOW_CLS_REPLACE; @@ -456,43 +442,31 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, cls_flower.rule->match.key = &f->mkey; cls_flower.classid = f->res.classid; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + rtnl_held); if (err) { kfree(cls_flower.rule); - if (skip_sw) + if (skip_sw) { NL_SET_ERR_MSG_MOD(extack, "Failed to setup flow action"); - else - err = 0; - goto errout; + return err; + } + return 0; } - err = tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSFLOWER, &cls_flower, + skip_sw, &f->flags, &f->in_hw_count, rtnl_held); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); - if (err < 0) { - fl_hw_destroy_filter(tp, f, true, NULL); - goto errout; - } else if (err > 0) { - f->in_hw_count = err; - err = 0; - spin_lock(&tp->lock); - tcf_block_offload_inc(block, &f->flags); - spin_unlock(&tp->lock); - } - - if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) { - err = -EINVAL; - goto errout; + if (err) { + fl_hw_destroy_filter(tp, f, rtnl_held, NULL); + return err; } - spin_lock(&tp->lock); - list_add(&f->hw_list, &head->hw_filters); - spin_unlock(&tp->lock); -errout: - if (!rtnl_held) - rtnl_unlock(); + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; - return err; + return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, @@ -501,22 +475,17 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f, struct tcf_block *block = tp->chain->block; struct flow_cls_offload cls_flower = {}; - if (!rtnl_held) - rtnl_lock(); - tc_cls_common_offload_init(&cls_flower.common, tp, f->flags, NULL); cls_flower.command = FLOW_CLS_STATS; cls_flower.cookie = (unsigned long) f; cls_flower.classid = f->res.classid; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, + rtnl_held); tcf_exts_stats_update(&f->exts, cls_flower.stats.bytes, cls_flower.stats.pkts, cls_flower.stats.lastused); - - if (!rtnl_held) - rtnl_unlock(); } static void __fl_put(struct cls_fl_filter *f) @@ -1831,7 +1800,8 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.rule->match.mask = &f->mask->key; cls_flower.rule->match.key = &f->mkey; - err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts); + err = tc_setup_flow_action(&cls_flower.rule->action, &f->exts, + true); if (err) { kfree(cls_flower.rule); if (tc_skip_sw(f->flags)) { @@ -1844,21 +1814,17 @@ static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, cls_flower.classid = f->res.classid; - err = cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, + TC_SETUP_CLSFLOWER, &cls_flower, + cb_priv, &f->flags, + &f->in_hw_count); + tc_cleanup_flow_action(&cls_flower.rule->action); kfree(cls_flower.rule); if (err) { - if (add && tc_skip_sw(f->flags)) { - __fl_put(f); - return err; - } - goto next_flow; + __fl_put(f); + return err; } - - spin_lock(&tp->lock); - tc_cls_offload_cnt_update(block, &f->in_hw_count, &f->flags, - add); - spin_unlock(&tp->lock); next_flow: __fl_put(f); } @@ -1866,6 +1832,30 @@ next_flow: return 0; } +static void fl_hw_add(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + struct cls_fl_head *head = fl_head_dereference(tp); + + spin_lock(&tp->lock); + list_add(&f->hw_list, &head->hw_filters); + spin_unlock(&tp->lock); +} + +static void fl_hw_del(struct tcf_proto *tp, void *type_data) +{ + struct flow_cls_offload *cls_flower = type_data; + struct cls_fl_filter *f = + (struct cls_fl_filter *) cls_flower->cookie; + + spin_lock(&tp->lock); + if (!list_empty(&f->hw_list)) + list_del_init(&f->hw_list); + spin_unlock(&tp->lock); +} + static int fl_hw_create_tmplt(struct tcf_chain *chain, struct fl_flow_tmplt *tmplt) { @@ -1886,7 +1876,7 @@ static int fl_hw_create_tmplt(struct tcf_chain *chain, /* We don't care if driver (any of them) fails to handle this * call. It serves just as a hint for it. */ - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); kfree(cls_flower.rule); return 0; @@ -1902,7 +1892,7 @@ static void fl_hw_destroy_tmplt(struct tcf_chain *chain, cls_flower.command = FLOW_CLS_TMPLT_DESTROY; cls_flower.cookie = (unsigned long) tmplt; - tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false); + tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false, true); } static void *fl_tmplt_create(struct net *net, struct tcf_chain *chain, @@ -2526,6 +2516,8 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .reoffload = fl_reoffload, + .hw_add = fl_hw_add, + .hw_del = fl_hw_del, .dump = fl_dump, .bind_class = fl_bind_class, .tmplt_create = fl_tmplt_create, diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 455ea2793f9b..7fc2eb62aa98 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -75,8 +75,8 @@ static void mall_destroy_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_DESTROY; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); - tcf_block_offload_dec(block, &head->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, false, + &head->flags, &head->in_hw_count, true); } static int mall_replace_hw_filter(struct tcf_proto *tp, @@ -97,7 +97,7 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_REPLACE; cls_mall.cookie = cookie; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); mall_destroy_hw_filter(tp, head, cookie, NULL); @@ -109,15 +109,14 @@ static int mall_replace_hw_filter(struct tcf_proto *tp, return err; } - err = tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, skip_sw); + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSMATCHALL, &cls_mall, + skip_sw, &head->flags, &head->in_hw_count, true); + tc_cleanup_flow_action(&cls_mall.rule->action); kfree(cls_mall.rule); - if (err < 0) { + if (err) { mall_destroy_hw_filter(tp, head, cookie, NULL); return err; - } else if (err > 0) { - head->in_hw_count = err; - tcf_block_offload_inc(block, &head->flags); } if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) @@ -302,7 +301,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, TC_CLSMATCHALL_REPLACE : TC_CLSMATCHALL_DESTROY; cls_mall.cookie = (unsigned long)head; - err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts); + err = tc_setup_flow_action(&cls_mall.rule->action, &head->exts, true); if (err) { kfree(cls_mall.rule); if (add && tc_skip_sw(head->flags)) { @@ -312,16 +311,14 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, return 0; } - err = cb(TC_SETUP_CLSMATCHALL, &cls_mall, cb_priv); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSMATCHALL, + &cls_mall, cb_priv, &head->flags, + &head->in_hw_count); + tc_cleanup_flow_action(&cls_mall.rule->action); kfree(cls_mall.rule); - if (err) { - if (add && tc_skip_sw(head->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &head->in_hw_count, &head->flags, add); + if (err) + return err; return 0; } @@ -337,7 +334,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp, cls_mall.command = TC_CLSMATCHALL_STATS; cls_mall.cookie = cookie; - tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false); + tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true); tcf_exts_stats_update(&head->exts, cls_mall.stats.bytes, cls_mall.stats.pkts, cls_mall.stats.lastused); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 8614088edd1b..a0e6fac613de 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -480,7 +480,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); + tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false, true); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, @@ -498,7 +498,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, cls_u32.hnode.handle = h->handle; cls_u32.hnode.prio = h->prio; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); + err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw, true); if (err < 0) { u32_clear_hw_hnode(tp, h, NULL); return err; @@ -522,8 +522,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.command = TC_CLSU32_DELETE_KNODE; cls_u32.knode.handle = n->handle; - tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, false); - tcf_block_offload_dec(block, &n->flags); + tc_setup_cb_destroy(block, tp, TC_SETUP_CLSU32, &cls_u32, false, + &n->flags, &n->in_hw_count, true); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, @@ -552,13 +552,11 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, if (n->ht_down) cls_u32.knode.link_handle = ht->handle; - err = tc_setup_cb_call(block, TC_SETUP_CLSU32, &cls_u32, skip_sw); - if (err < 0) { + err = tc_setup_cb_add(block, tp, TC_SETUP_CLSU32, &cls_u32, skip_sw, + &n->flags, &n->in_hw_count, true); + if (err) { u32_remove_hw_knode(tp, n, NULL); return err; - } else if (err > 0) { - n->in_hw_count = err; - tcf_block_offload_inc(block, &n->flags); } if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) @@ -1201,14 +1199,11 @@ static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n, cls_u32.knode.link_handle = ht->handle; } - err = cb(TC_SETUP_CLSU32, &cls_u32, cb_priv); - if (err) { - if (add && tc_skip_sw(n->flags)) - return err; - return 0; - } - - tc_cls_offload_cnt_update(block, &n->in_hw_count, &n->flags, add); + err = tc_setup_cb_reoffload(block, tp, add, cb, TC_SETUP_CLSU32, + &cls_u32, cb_priv, &n->flags, + &n->in_hw_count); + if (err) + return err; return 0; } diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 810645b5c086..93b58fde99b7 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -299,7 +299,7 @@ static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; - int port_rate = -1; + int port_rate; int err; err = __ethtool_get_link_ksettings(dev, &ecmd); diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index d59fbcc745d1..c261c0a18868 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -45,7 +45,6 @@ struct fq_codel_flow { struct sk_buff *tail; struct list_head flowchain; int deficit; - u32 dropped; /* number of drops (or ECN marks) on this flow */ struct codel_vars cvars; }; /* please try to keep this structure <= 64 bytes */ @@ -173,7 +172,8 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); - flow->dropped += i; + /* Tell codel to increase its signal strength also */ + flow->cvars.count += i; q->backlogs[idx] -= len; q->memory_usage -= mem; sch->qstats.drops += i; @@ -211,7 +211,6 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch, list_add_tail(&flow->flowchain, &q->new_flows); q->new_flow_count++; flow->deficit = q->quantum; - flow->dropped = 0; } get_codel_cb(skb)->mem_usage = skb->truesize; q->memory_usage += get_codel_cb(skb)->mem_usage; @@ -286,7 +285,6 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch) struct sk_buff *skb; struct fq_codel_flow *flow; struct list_head *head; - u32 prev_drop_count, prev_ecn_mark; begin: head = &q->new_flows; @@ -303,16 +301,10 @@ begin: goto begin; } - prev_drop_count = q->cstats.drop_count; - prev_ecn_mark = q->cstats.ecn_mark; - skb = codel_dequeue(sch, &sch->qstats.backlog, &q->cparams, &flow->cvars, &q->cstats, qdisc_pkt_len, codel_get_enqueue_time, drop_func, dequeue_func); - flow->dropped += q->cstats.drop_count - prev_drop_count; - flow->dropped += q->cstats.ecn_mark - prev_ecn_mark; - if (!skb) { /* force a pass through old_flows to prevent starvation */ if ((head == &q->new_flows) && !list_empty(&q->old_flows)) @@ -658,7 +650,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, sch_tree_unlock(sch); } qs.backlog = q->backlogs[idx]; - qs.drops = flow->dropped; + qs.drops = 0; } if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ac28f6a5d70e..17bd8f539bc7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -985,6 +985,9 @@ static void qdisc_destroy(struct Qdisc *qdisc) void qdisc_put(struct Qdisc *qdisc) { + if (!qdisc) + return; + if (qdisc->flags & TCQ_F_BUILTIN || !refcount_dec_and_test(&qdisc->refcnt)) return; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 8d8bc2ec5cd6..2f7b34205c82 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -29,8 +29,8 @@ static DEFINE_SPINLOCK(taprio_list_lock); #define TAPRIO_ALL_GATES_OPEN -1 -#define FLAGS_VALID(flags) (!((flags) & ~TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST)) #define TXTIME_ASSIST_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) +#define FULL_OFFLOAD_IS_ENABLED(flags) ((flags) & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD) struct sched_entry { struct list_head list; @@ -75,9 +75,16 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; + struct sk_buff *(*dequeue)(struct Qdisc *sch); + struct sk_buff *(*peek)(struct Qdisc *sch); u32 txtime_delay; }; +struct __tc_taprio_qopt_offload { + refcount_t users; + struct tc_taprio_qopt_offload offload; +}; + static ktime_t sched_base_time(const struct sched_gate_list *sched) { if (!sched) @@ -268,6 +275,19 @@ static bool is_valid_interval(struct sk_buff *skb, struct Qdisc *sch) return entry; } +static bool taprio_flags_valid(u32 flags) +{ + /* Make sure no other flag bits are set. */ + if (flags & ~(TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST | + TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) + return false; + /* txtime-assist and full offload are mutually exclusive */ + if ((flags & TCA_TAPRIO_ATTR_FLAG_TXTIME_ASSIST) && + (flags & TCA_TAPRIO_ATTR_FLAG_FULL_OFFLOAD)) + return false; + return true; +} + /* This returns the tstamp value set by TCP in terms of the set clock. */ static ktime_t get_tcp_tstamp(struct taprio_sched *q, struct sk_buff *skb) { @@ -417,7 +437,7 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_enqueue(skb, child, to_free); } -static struct sk_buff *taprio_peek(struct Qdisc *sch) +static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -461,6 +481,36 @@ static struct sk_buff *taprio_peek(struct Qdisc *sch) return NULL; } +static struct sk_buff *taprio_peek_offload(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + + if (unlikely(!child)) + continue; + + skb = child->ops->peek(child); + if (!skb) + continue; + + return skb; + } + + return NULL; +} + +static struct sk_buff *taprio_peek(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + + return q->peek(sch); +} + static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) { atomic_set(&entry->budget, @@ -468,7 +518,7 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) atomic64_read(&q->picos_per_byte))); } -static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -550,6 +600,40 @@ done: return skb; } +static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct sk_buff *skb; + int i; + + for (i = 0; i < dev->num_tx_queues; i++) { + struct Qdisc *child = q->qdiscs[i]; + + if (unlikely(!child)) + continue; + + skb = child->ops->dequeue(child); + if (unlikely(!skb)) + continue; + + qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); + sch->q.qlen--; + + return skb; + } + + return NULL; +} + +static struct sk_buff *taprio_dequeue(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + + return q->dequeue(sch); +} + static bool should_restart_cycle(const struct sched_gate_list *oper, const struct sched_entry *entry) { @@ -672,10 +756,6 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, }; -static const struct nla_policy entry_list_policy[TCA_TAPRIO_SCHED_MAX + 1] = { - [TCA_TAPRIO_SCHED_ENTRY] = { .type = NLA_NESTED }, -}; - static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -936,6 +1016,9 @@ static void taprio_start_sched(struct Qdisc *sch, struct taprio_sched *q = qdisc_priv(sch); ktime_t expires; + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) + return; + expires = hrtimer_get_expires(&q->advance_timer); if (expires == 0) expires = KTIME_MAX; @@ -1015,6 +1098,254 @@ static void setup_txtime(struct taprio_sched *q, } } +static struct tc_taprio_qopt_offload *taprio_offload_alloc(int num_entries) +{ + size_t size = sizeof(struct tc_taprio_sched_entry) * num_entries + + sizeof(struct __tc_taprio_qopt_offload); + struct __tc_taprio_qopt_offload *__offload; + + __offload = kzalloc(size, GFP_KERNEL); + if (!__offload) + return NULL; + + refcount_set(&__offload->users, 1); + + return &__offload->offload; +} + +struct tc_taprio_qopt_offload *taprio_offload_get(struct tc_taprio_qopt_offload + *offload) +{ + struct __tc_taprio_qopt_offload *__offload; + + __offload = container_of(offload, struct __tc_taprio_qopt_offload, + offload); + + refcount_inc(&__offload->users); + + return offload; +} +EXPORT_SYMBOL_GPL(taprio_offload_get); + +void taprio_offload_free(struct tc_taprio_qopt_offload *offload) +{ + struct __tc_taprio_qopt_offload *__offload; + + __offload = container_of(offload, struct __tc_taprio_qopt_offload, + offload); + + if (!refcount_dec_and_test(&__offload->users)) + return; + + kfree(__offload); +} +EXPORT_SYMBOL_GPL(taprio_offload_free); + +/* The function will only serve to keep the pointers to the "oper" and "admin" + * schedules valid in relation to their base times, so when calling dump() the + * users looks at the right schedules. + * When using full offload, the admin configuration is promoted to oper at the + * base_time in the PHC time domain. But because the system time is not + * necessarily in sync with that, we can't just trigger a hrtimer to call + * switch_schedules at the right hardware time. + * At the moment we call this by hand right away from taprio, but in the future + * it will be useful to create a mechanism for drivers to notify taprio of the + * offload state (PENDING, ACTIVE, INACTIVE) so it can be visible in dump(). + * This is left as TODO. + */ +void taprio_offload_config_changed(struct taprio_sched *q) +{ + struct sched_gate_list *oper, *admin; + + spin_lock(&q->current_entry_lock); + + oper = rcu_dereference_protected(q->oper_sched, + lockdep_is_held(&q->current_entry_lock)); + admin = rcu_dereference_protected(q->admin_sched, + lockdep_is_held(&q->current_entry_lock)); + + switch_schedules(q, &admin, &oper); + + spin_unlock(&q->current_entry_lock); +} + +static void taprio_sched_to_offload(struct taprio_sched *q, + struct sched_gate_list *sched, + const struct tc_mqprio_qopt *mqprio, + struct tc_taprio_qopt_offload *offload) +{ + struct sched_entry *entry; + int i = 0; + + offload->base_time = sched->base_time; + offload->cycle_time = sched->cycle_time; + offload->cycle_time_extension = sched->cycle_time_extension; + + list_for_each_entry(entry, &sched->entries, list) { + struct tc_taprio_sched_entry *e = &offload->entries[i]; + + e->command = entry->command; + e->interval = entry->interval; + e->gate_mask = entry->gate_mask; + i++; + } + + offload->num_entries = i; +} + +static int taprio_enable_offload(struct net_device *dev, + struct tc_mqprio_qopt *mqprio, + struct taprio_sched *q, + struct sched_gate_list *sched, + struct netlink_ext_ack *extack) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_taprio_qopt_offload *offload; + int err = 0; + + if (!ops->ndo_setup_tc) { + NL_SET_ERR_MSG(extack, + "Device does not support taprio offload"); + return -EOPNOTSUPP; + } + + offload = taprio_offload_alloc(sched->num_entries); + if (!offload) { + NL_SET_ERR_MSG(extack, + "Not enough memory for enabling offload mode"); + return -ENOMEM; + } + offload->enable = 1; + taprio_sched_to_offload(q, sched, mqprio, offload); + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Device failed to setup taprio offload"); + goto done; + } + + taprio_offload_config_changed(q); + +done: + taprio_offload_free(offload); + + return err; +} + +static int taprio_disable_offload(struct net_device *dev, + struct taprio_sched *q, + struct netlink_ext_ack *extack) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_taprio_qopt_offload *offload; + int err; + + if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) + return 0; + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + offload = taprio_offload_alloc(0); + if (!offload) { + NL_SET_ERR_MSG(extack, + "Not enough memory to disable offload mode"); + return -ENOMEM; + } + offload->enable = 0; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); + if (err < 0) { + NL_SET_ERR_MSG(extack, + "Device failed to disable offload"); + goto out; + } + +out: + taprio_offload_free(offload); + + return err; +} + +/* If full offload is enabled, the only possible clockid is the net device's + * PHC. For that reason, specifying a clockid through netlink is incorrect. + * For txtime-assist, it is implicitly assumed that the device's PHC is kept + * in sync with the specified clockid via a user space daemon such as phc2sys. + * For both software taprio and txtime-assist, the clockid is used for the + * hrtimer that advances the schedule and hence mandatory. + */ +static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int err = -EINVAL; + + if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_ts_info info = { + .cmd = ETHTOOL_GET_TS_INFO, + .phc_index = -1, + }; + + if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + NL_SET_ERR_MSG(extack, + "The 'clockid' cannot be specified for full offload"); + goto out; + } + + if (ops && ops->get_ts_info) + err = ops->get_ts_info(dev, &info); + + if (err || info.phc_index < 0) { + NL_SET_ERR_MSG(extack, + "Device does not have a PTP clock"); + err = -ENOTSUPP; + goto out; + } + } else if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { + int clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); + + /* We only support static clockids and we don't allow + * for it to be modified after the first init. + */ + if (clockid < 0 || + (q->clockid != -1 && q->clockid != clockid)) { + NL_SET_ERR_MSG(extack, + "Changing the 'clockid' of a running schedule is not supported"); + err = -ENOTSUPP; + goto out; + } + + switch (clockid) { + case CLOCK_REALTIME: + q->tk_offset = TK_OFFS_REAL; + break; + case CLOCK_MONOTONIC: + q->tk_offset = TK_OFFS_MAX; + break; + case CLOCK_BOOTTIME: + q->tk_offset = TK_OFFS_BOOT; + break; + case CLOCK_TAI: + q->tk_offset = TK_OFFS_TAI; + break; + default: + NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); + err = -EINVAL; + goto out; + } + + q->clockid = clockid; + } else { + NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); + goto out; + } +out: + return err; +} + static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { @@ -1024,9 +1355,9 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, struct net_device *dev = qdisc_dev(sch); struct tc_mqprio_qopt *mqprio = NULL; u32 taprio_flags = 0; - int i, err, clockid; unsigned long flags; ktime_t start; + int i, err; err = nla_parse_nested_deprecated(tb, TCA_TAPRIO_ATTR_MAX, opt, taprio_policy, extack); @@ -1042,7 +1373,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (q->flags != 0 && q->flags != taprio_flags) { NL_SET_ERR_MSG_MOD(extack, "Changing 'flags' of a running schedule is not supported"); return -EOPNOTSUPP; - } else if (!FLAGS_VALID(taprio_flags)) { + } else if (!taprio_flags_valid(taprio_flags)) { NL_SET_ERR_MSG_MOD(extack, "Specified 'flags' are not valid"); return -EINVAL; } @@ -1082,30 +1413,19 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, goto free_sched; } - if (tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - clockid = nla_get_s32(tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]); - - /* We only support static clockids and we don't allow - * for it to be modified after the first init. - */ - if (clockid < 0 || - (q->clockid != -1 && q->clockid != clockid)) { - NL_SET_ERR_MSG(extack, "Changing the 'clockid' of a running schedule is not supported"); - err = -ENOTSUPP; - goto free_sched; - } - - q->clockid = clockid; - } - - if (q->clockid == -1 && !tb[TCA_TAPRIO_ATTR_SCHED_CLOCKID]) { - NL_SET_ERR_MSG(extack, "Specifying a 'clockid' is mandatory"); - err = -EINVAL; + err = taprio_parse_clockid(sch, tb, extack); + if (err < 0) goto free_sched; - } taprio_set_picos_per_byte(dev, q); + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) + err = taprio_enable_offload(dev, mqprio, q, new_admin, extack); + else + err = taprio_disable_offload(dev, q, extack); + if (err) + goto free_sched; + /* Protects against enqueue()/dequeue() */ spin_lock_bh(qdisc_lock(sch)); @@ -1120,6 +1440,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } if (!TXTIME_ASSIST_IS_ENABLED(taprio_flags) && + !FULL_OFFLOAD_IS_ENABLED(taprio_flags) && !hrtimer_active(&q->advance_timer)) { hrtimer_init(&q->advance_timer, q->clockid, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; @@ -1138,23 +1459,15 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, mqprio->prio_tc_map[i]); } - switch (q->clockid) { - case CLOCK_REALTIME: - q->tk_offset = TK_OFFS_REAL; - break; - case CLOCK_MONOTONIC: - q->tk_offset = TK_OFFS_MAX; - break; - case CLOCK_BOOTTIME: - q->tk_offset = TK_OFFS_BOOT; - break; - case CLOCK_TAI: - q->tk_offset = TK_OFFS_TAI; - break; - default: - NL_SET_ERR_MSG(extack, "Invalid 'clockid'"); - err = -EINVAL; - goto unlock; + if (FULL_OFFLOAD_IS_ENABLED(taprio_flags)) { + q->dequeue = taprio_dequeue_offload; + q->peek = taprio_peek_offload; + } else { + /* Be sure to always keep the function pointers + * in a consistent state. + */ + q->dequeue = taprio_dequeue_soft; + q->peek = taprio_peek_soft; } err = taprio_get_start_time(sch, new_admin, &start); @@ -1216,6 +1529,8 @@ static void taprio_destroy(struct Qdisc *sch) hrtimer_cancel(&q->advance_timer); + taprio_disable_offload(dev, q, NULL); + if (q->qdiscs) { for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) qdisc_put(q->qdiscs[i]); @@ -1245,6 +1560,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; + q->dequeue = taprio_dequeue_soft; + q->peek = taprio_peek_soft; + q->root = sch; /* We only support static clockids. Use an invalid value as default @@ -1427,7 +1745,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_TAPRIO_ATTR_PRIOMAP, sizeof(opt), &opt)) goto options_error; - if (nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) + if (!FULL_OFFLOAD_IS_ENABLED(q->flags) && + nla_put_s32(skb, TCA_TAPRIO_ATTR_SCHED_CLOCKID, q->clockid)) goto options_error; if (q->flags && nla_put_u32(skb, TCA_TAPRIO_ATTR_FLAGS, q->flags)) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 5010cce52c93..d2ffc9a0ba3a 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -54,7 +54,6 @@ static struct sctp_association *sctp_association_init( const struct sock *sk, enum sctp_scope scope, gfp_t gfp) { - struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_paramhdr *p; int i; @@ -214,14 +213,6 @@ static struct sctp_association *sctp_association_init( asoc->peer.sack_needed = 1; asoc->peer.sack_generation = 1; - /* Assume that the peer will tell us if he recognizes ASCONF - * as part of INIT exchange. - * The sctp_addip_noauth option is there for backward compatibility - * and will revert old behavior. - */ - if (net->sctp.addip_noauth) - asoc->peer.asconf_capable = 1; - /* Create an input queue. */ sctp_inq_init(&asoc->base.inqueue); sctp_inq_set_th_handler(&asoc->base.inqueue, sctp_assoc_bh_rcv); diff --git a/net/sctp/auth.c b/net/sctp/auth.c index de4c78d4a21e..4278764d82b8 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -389,7 +389,7 @@ int sctp_auth_asoc_init_active_key(struct sctp_association *asoc, gfp_t gfp) /* If we don't support AUTH, or peer is not capable * we don't need to do anything. */ - if (!asoc->ep->auth_enable || !asoc->peer.auth_capable) + if (!asoc->peer.auth_capable) return 0; /* If the key_id is non-zero and we couldn't find an @@ -675,7 +675,7 @@ int sctp_auth_send_cid(enum sctp_cid chunk, const struct sctp_association *asoc) if (!asoc) return 0; - if (!asoc->ep->auth_enable || !asoc->peer.auth_capable) + if (!asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, asoc->peer.peer_chunks); @@ -687,7 +687,7 @@ int sctp_auth_recv_cid(enum sctp_cid chunk, const struct sctp_association *asoc) if (!asoc) return 0; - if (!asoc->ep->auth_enable) + if (!asoc->peer.auth_capable) return 0; return __sctp_auth_cid(chunk, @@ -831,10 +831,15 @@ int sctp_auth_set_key(struct sctp_endpoint *ep, /* Try to find the given key id to see if * we are doing a replace, or adding a new key */ - if (asoc) + if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; sh_keys = &asoc->endpoint_shared_keys; - else + } else { + if (!ep->auth_enable) + return -EACCES; sh_keys = &ep->endpoint_shared_keys; + } key_for_each(shkey, sh_keys) { if (shkey->key_id == auth_key->sca_keynumber) { @@ -875,10 +880,15 @@ int sctp_auth_set_active_key(struct sctp_endpoint *ep, int found = 0; /* The key identifier MUST correst to an existing key */ - if (asoc) + if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; sh_keys = &asoc->endpoint_shared_keys; - else + } else { + if (!ep->auth_enable) + return -EACCES; sh_keys = &ep->endpoint_shared_keys; + } key_for_each(key, sh_keys) { if (key->key_id == key_id) { @@ -911,11 +921,15 @@ int sctp_auth_del_key_id(struct sctp_endpoint *ep, * The key identifier MUST correst to an existing key */ if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; if (asoc->active_key_id == key_id) return -EINVAL; sh_keys = &asoc->endpoint_shared_keys; } else { + if (!ep->auth_enable) + return -EACCES; if (ep->active_key_id == key_id) return -EINVAL; @@ -950,11 +964,15 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep, * The key identifier MUST correst to an existing key */ if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; if (asoc->active_key_id == key_id) return -EINVAL; sh_keys = &asoc->endpoint_shared_keys; } else { + if (!ep->auth_enable) + return -EACCES; if (ep->active_key_id == key_id) return -EINVAL; @@ -989,3 +1007,72 @@ int sctp_auth_deact_key_id(struct sctp_endpoint *ep, return 0; } + +int sctp_auth_init(struct sctp_endpoint *ep, gfp_t gfp) +{ + int err = -ENOMEM; + + /* Allocate space for HMACS and CHUNKS authentication + * variables. There are arrays that we encode directly + * into parameters to make the rest of the operations easier. + */ + if (!ep->auth_hmacs_list) { + struct sctp_hmac_algo_param *auth_hmacs; + + auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids, + SCTP_AUTH_NUM_HMACS), gfp); + if (!auth_hmacs) + goto nomem; + /* Initialize the HMACS parameter. + * SCTP-AUTH: Section 3.3 + * Every endpoint supporting SCTP chunk authentication MUST + * support the HMAC based on the SHA-1 algorithm. + */ + auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; + auth_hmacs->param_hdr.length = + htons(sizeof(struct sctp_paramhdr) + 2); + auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); + ep->auth_hmacs_list = auth_hmacs; + } + + if (!ep->auth_chunk_list) { + struct sctp_chunks_param *auth_chunks; + + auth_chunks = kzalloc(sizeof(*auth_chunks) + + SCTP_NUM_CHUNK_TYPES, gfp); + if (!auth_chunks) + goto nomem; + /* Initialize the CHUNKS parameter */ + auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; + auth_chunks->param_hdr.length = + htons(sizeof(struct sctp_paramhdr)); + ep->auth_chunk_list = auth_chunks; + } + + /* Allocate and initialize transorms arrays for supported + * HMACs. + */ + err = sctp_auth_init_hmacs(ep, gfp); + if (err) + goto nomem; + + return 0; + +nomem: + /* Free all allocations */ + kfree(ep->auth_hmacs_list); + kfree(ep->auth_chunk_list); + ep->auth_hmacs_list = NULL; + ep->auth_chunk_list = NULL; + return err; +} + +void sctp_auth_free(struct sctp_endpoint *ep) +{ + kfree(ep->auth_hmacs_list); + kfree(ep->auth_chunk_list); + ep->auth_hmacs_list = NULL; + ep->auth_chunk_list = NULL; + sctp_auth_destroy_hmacs(ep->auth_hmacs); + ep->auth_hmacs = NULL; +} diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 69cebb2c998b..ea53049d1db6 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -43,62 +43,21 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, gfp_t gfp) { struct net *net = sock_net(sk); - struct sctp_hmac_algo_param *auth_hmacs = NULL; - struct sctp_chunks_param *auth_chunks = NULL; struct sctp_shared_key *null_key; - int err; ep->digest = kzalloc(SCTP_SIGNATURE_SIZE, gfp); if (!ep->digest) return NULL; + ep->asconf_enable = net->sctp.addip_enable; ep->auth_enable = net->sctp.auth_enable; if (ep->auth_enable) { - /* Allocate space for HMACS and CHUNKS authentication - * variables. There are arrays that we encode directly - * into parameters to make the rest of the operations easier. - */ - auth_hmacs = kzalloc(struct_size(auth_hmacs, hmac_ids, - SCTP_AUTH_NUM_HMACS), gfp); - if (!auth_hmacs) - goto nomem; - - auth_chunks = kzalloc(sizeof(*auth_chunks) + - SCTP_NUM_CHUNK_TYPES, gfp); - if (!auth_chunks) + if (sctp_auth_init(ep, gfp)) goto nomem; - - /* Initialize the HMACS parameter. - * SCTP-AUTH: Section 3.3 - * Every endpoint supporting SCTP chunk authentication MUST - * support the HMAC based on the SHA-1 algorithm. - */ - auth_hmacs->param_hdr.type = SCTP_PARAM_HMAC_ALGO; - auth_hmacs->param_hdr.length = - htons(sizeof(struct sctp_paramhdr) + 2); - auth_hmacs->hmac_ids[0] = htons(SCTP_AUTH_HMAC_ID_SHA1); - - /* Initialize the CHUNKS parameter */ - auth_chunks->param_hdr.type = SCTP_PARAM_CHUNKS; - auth_chunks->param_hdr.length = - htons(sizeof(struct sctp_paramhdr)); - - /* If the Add-IP functionality is enabled, we must - * authenticate, ASCONF and ASCONF-ACK chunks - */ - if (net->sctp.addip_enable) { - auth_chunks->chunks[0] = SCTP_CID_ASCONF; - auth_chunks->chunks[1] = SCTP_CID_ASCONF_ACK; - auth_chunks->param_hdr.length = - htons(sizeof(struct sctp_paramhdr) + 2); + if (ep->asconf_enable) { + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); } - - /* Allocate and initialize transorms arrays for supported - * HMACs. - */ - err = sctp_auth_init_hmacs(ep, gfp); - if (err) - goto nomem; } /* Initialize the base structure. */ @@ -145,10 +104,9 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Add the null key to the endpoint shared keys list and * set the hmcas and chunks pointers. */ - ep->auth_hmacs_list = auth_hmacs; - ep->auth_chunk_list = auth_chunks; ep->prsctp_enable = net->sctp.prsctp_enable; ep->reconf_enable = net->sctp.reconf_enable; + ep->ecn_enable = net->sctp.ecn_enable; /* Remember who we are attached to. */ ep->base.sk = sk; @@ -157,11 +115,8 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, return ep; nomem_shkey: - sctp_auth_destroy_hmacs(ep->auth_hmacs); + sctp_auth_free(ep); nomem: - /* Free all allocations */ - kfree(auth_hmacs); - kfree(auth_chunks); kfree(ep->digest); return NULL; @@ -244,11 +199,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) * chunks and hmacs arrays that were allocated */ sctp_auth_destroy_keys(&ep->endpoint_shared_keys); - kfree(ep->auth_hmacs_list); - kfree(ep->auth_chunk_list); - - /* AUTH - Free any allocated HMAC transform containers */ - sctp_auth_destroy_hmacs(ep->auth_hmacs); + sctp_auth_free(ep); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 53746ffeeca3..08d14d86ecfb 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1254,6 +1254,9 @@ static int __net_init sctp_defaults_init(struct net *net) /* Disable AUTH by default. */ net->sctp.auth_enable = 0; + /* Enable ECN by default. */ + net->sctp.ecn_enable = 1; + /* Set SCOPE policy to enabled */ net->sctp.scope_policy = SCTP_SCOPE_POLICY_ENABLE; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 36bd8a6e82df..e41ed2e0ae7d 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -207,7 +207,6 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, const struct sctp_bind_addr *bp, gfp_t gfp, int vparam_len) { - struct net *net = sock_net(asoc->base.sk); struct sctp_supported_ext_param ext_param; struct sctp_adaptation_ind_param aiparam; struct sctp_paramhdr *auth_chunks = NULL; @@ -245,7 +244,9 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, chunksize = sizeof(init) + addrs_len; chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types)); - chunksize += sizeof(ecap_param); + + if (asoc->ep->ecn_enable) + chunksize += sizeof(ecap_param); if (asoc->ep->prsctp_enable) chunksize += sizeof(prsctp_param); @@ -255,7 +256,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, * the ASCONF,the ASCONF-ACK, and the AUTH chunks in its INIT and * INIT-ACK parameters. */ - if (net->sctp.addip_enable) { + if (asoc->ep->asconf_enable) { extensions[num_ext] = SCTP_CID_ASCONF; extensions[num_ext+1] = SCTP_CID_ASCONF_ACK; num_ext += 2; @@ -336,7 +337,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, sctp_addto_chunk(retval, sizeof(sat), &sat); sctp_addto_chunk(retval, num_types * sizeof(__u16), &types); - sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); + if (asoc->ep->ecn_enable) + sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); /* Add the supported extensions parameter. Be nice and add this * fist before addiding the parameters for the extensions themselves @@ -1964,7 +1966,9 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, return 0; } -static int sctp_verify_ext_param(struct net *net, union sctp_params param) +static int sctp_verify_ext_param(struct net *net, + const struct sctp_endpoint *ep, + union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); int have_asconf = 0; @@ -1991,7 +1995,7 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) if (net->sctp.addip_noauth) return 1; - if (net->sctp.addip_enable && !have_auth && have_asconf) + if (ep->asconf_enable && !have_auth && have_asconf) return 0; return 1; @@ -2001,7 +2005,6 @@ static void sctp_process_ext_param(struct sctp_association *asoc, union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); - struct net *net = sock_net(asoc->base.sk); int i; for (i = 0; i < num_ext; i++) { @@ -2023,7 +2026,7 @@ static void sctp_process_ext_param(struct sctp_association *asoc, break; case SCTP_CID_ASCONF: case SCTP_CID_ASCONF_ACK: - if (net->sctp.addip_enable) + if (asoc->ep->asconf_enable) asoc->peer.asconf_capable = 1; break; case SCTP_CID_I_DATA: @@ -2145,12 +2148,12 @@ static enum sctp_ierror sctp_verify_param(struct net *net, break; case SCTP_PARAM_SUPPORTED_EXT: - if (!sctp_verify_ext_param(net, param)) + if (!sctp_verify_ext_param(net, ep, param)) return SCTP_IERROR_ABORT; break; case SCTP_PARAM_SET_PRIMARY: - if (net->sctp.addip_enable) + if (ep->asconf_enable) break; goto fallthrough; @@ -2597,15 +2600,20 @@ do_addr_param: break; case SCTP_PARAM_ECN_CAPABLE: - asoc->peer.ecn_capable = 1; - break; + if (asoc->ep->ecn_enable) { + asoc->peer.ecn_capable = 1; + break; + } + /* Fall Through */ + goto fall_through; + case SCTP_PARAM_ADAPTATION_LAYER_IND: asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind); break; case SCTP_PARAM_SET_PRIMARY: - if (!net->sctp.addip_enable) + if (!ep->asconf_enable) goto fall_through; addr_param = param.v + sizeof(struct sctp_addip_param); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 2c244b29a199..0c21c52fc408 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3721,7 +3721,8 @@ enum sctp_disposition sctp_sf_do_asconf(struct net *net, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!net->sctp.addip_noauth && !chunk->auth) + if (!asoc->peer.asconf_capable || + (!net->sctp.addip_noauth && !chunk->auth)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); @@ -3863,7 +3864,8 @@ enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, * is received unauthenticated it MUST be silently discarded as * described in [I-D.ietf-tsvwg-sctp-auth]. */ - if (!net->sctp.addip_noauth && !asconf_ack->auth) + if (!asoc->peer.asconf_capable || + (!net->sctp.addip_noauth && !asconf_ack->auth)) return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 61ed9c6e3be3..88ea87f4f0e7 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -976,26 +976,22 @@ static const struct sctp_sm_table_entry *sctp_chunk_event_lookup( if (cid <= SCTP_CID_BASE_MAX) return &chunk_event_table[cid][state]; - if (net->sctp.prsctp_enable) { - if (cid == SCTP_CID_FWD_TSN || cid == SCTP_CID_I_FWD_TSN) - return &prsctp_chunk_event_table[0][state]; - } + switch ((u16)cid) { + case SCTP_CID_FWD_TSN: + case SCTP_CID_I_FWD_TSN: + return &prsctp_chunk_event_table[0][state]; - if (net->sctp.addip_enable) { - if (cid == SCTP_CID_ASCONF) - return &addip_chunk_event_table[0][state]; + case SCTP_CID_ASCONF: + return &addip_chunk_event_table[0][state]; - if (cid == SCTP_CID_ASCONF_ACK) - return &addip_chunk_event_table[1][state]; - } + case SCTP_CID_ASCONF_ACK: + return &addip_chunk_event_table[1][state]; - if (net->sctp.reconf_enable) - if (cid == SCTP_CID_RECONF) - return &reconf_chunk_event_table[0][state]; + case SCTP_CID_RECONF: + return &reconf_chunk_event_table[0][state]; - if (net->sctp.auth_enable) { - if (cid == SCTP_CID_AUTH) - return &auth_chunk_event_table[0][state]; + case SCTP_CID_AUTH: + return &auth_chunk_event_table[0][state]; } return &chunk_event_table_unknown[state]; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index b083d4e66230..939b8d2595bc 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -525,7 +525,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { - struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -540,12 +539,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk, int i; int retval = 0; - if (!net->sctp.addip_enable) - return retval; - sp = sctp_sk(sk); ep = sp->ep; + if (!ep->asconf_enable) + return retval; + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, addrs, addrcnt); @@ -728,7 +727,6 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { - struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; @@ -744,12 +742,12 @@ static int sctp_send_asconf_del_ip(struct sock *sk, int stored = 0; chunk = NULL; - if (!net->sctp.addip_enable) - return retval; - sp = sctp_sk(sk); ep = sp->ep; + if (!ep->asconf_enable) + return retval; + pr_debug("%s: sk:%p, addrs:%p, addrcnt:%d\n", __func__, sk, addrs, addrcnt); @@ -1045,158 +1043,161 @@ out: return err; } -/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size) - * - * Common routine for handling connect() and sctp_connectx(). - * Connect will come in with just a single address. - */ -static int __sctp_connect(struct sock *sk, - struct sockaddr *kaddrs, - int addrs_size, int flags, - sctp_assoc_t *assoc_id) +static int sctp_connect_new_asoc(struct sctp_endpoint *ep, + const union sctp_addr *daddr, + const struct sctp_initmsg *init, + struct sctp_transport **tp) { + struct sctp_association *asoc; + struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); - struct sctp_sock *sp; - struct sctp_endpoint *ep; - struct sctp_association *asoc = NULL; - struct sctp_association *asoc2; - struct sctp_transport *transport; - union sctp_addr to; enum sctp_scope scope; - long timeo; - int err = 0; - int addrcnt = 0; - int walk_size = 0; - union sctp_addr *sa_addr = NULL; - void *addr_buf; - unsigned short port; + int err; - sp = sctp_sk(sk); - ep = sp->ep; + if (sctp_endpoint_is_peeled_off(ep, daddr)) + return -EADDRNOTAVAIL; - /* connect() cannot be done on a socket that is already in ESTABLISHED - * state - UDP-style peeled off socket or a TCP-style socket that - * is already connected. - * It cannot be done even on a TCP-style listening socket. - */ - if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) || - (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) { - err = -EISCONN; - goto out_free; + if (!ep->base.bind_addr.port) { + if (sctp_autobind(sk)) + return -EAGAIN; + } else { + if (ep->base.bind_addr.port < inet_prot_sock(net) && + !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) + return -EACCES; } - /* Walk through the addrs buffer and count the number of addresses. */ - addr_buf = kaddrs; - while (walk_size < addrs_size) { - struct sctp_af *af; - - if (walk_size + sizeof(sa_family_t) > addrs_size) { - err = -EINVAL; - goto out_free; - } + scope = sctp_scope(daddr); + asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); + if (!asoc) + return -ENOMEM; - sa_addr = addr_buf; - af = sctp_get_af_specific(sa_addr->sa.sa_family); + err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL); + if (err < 0) + goto free; - /* If the address family is not supported or if this address - * causes the address buffer to overflow return EINVAL. - */ - if (!af || (walk_size + af->sockaddr_len) > addrs_size) { - err = -EINVAL; - goto out_free; - } + *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); + if (!*tp) { + err = -ENOMEM; + goto free; + } - port = ntohs(sa_addr->v4.sin_port); + if (!init) + return 0; - /* Save current address so we can work with it */ - memcpy(&to, sa_addr, af->sockaddr_len); + if (init->sinit_num_ostreams) { + __u16 outcnt = init->sinit_num_ostreams; - err = sctp_verify_addr(sk, &to, af->sockaddr_len); + asoc->c.sinit_num_ostreams = outcnt; + /* outcnt has been changed, need to re-init stream */ + err = sctp_stream_init(&asoc->stream, outcnt, 0, GFP_KERNEL); if (err) - goto out_free; + goto free; + } - /* Make sure the destination port is correctly set - * in all addresses. - */ - if (asoc && asoc->peer.port && asoc->peer.port != port) { - err = -EINVAL; - goto out_free; - } + if (init->sinit_max_instreams) + asoc->c.sinit_max_instreams = init->sinit_max_instreams; - /* Check if there already is a matching association on the - * endpoint (other than the one created here). - */ - asoc2 = sctp_endpoint_lookup_assoc(ep, &to, &transport); - if (asoc2 && asoc2 != asoc) { - if (asoc2->state >= SCTP_STATE_ESTABLISHED) - err = -EISCONN; - else - err = -EALREADY; - goto out_free; - } + if (init->sinit_max_attempts) + asoc->max_init_attempts = init->sinit_max_attempts; - /* If we could not find a matching association on the endpoint, - * make sure that there is no peeled-off association matching - * the peer address even on another socket. - */ - if (sctp_endpoint_is_peeled_off(ep, &to)) { - err = -EADDRNOTAVAIL; - goto out_free; - } + if (init->sinit_max_init_timeo) + asoc->max_init_timeo = + msecs_to_jiffies(init->sinit_max_init_timeo); - if (!asoc) { - /* If a bind() or sctp_bindx() is not called prior to - * an sctp_connectx() call, the system picks an - * ephemeral port and will choose an address set - * equivalent to binding with a wildcard address. - */ - if (!ep->base.bind_addr.port) { - if (sctp_autobind(sk)) { - err = -EAGAIN; - goto out_free; - } - } else { - /* - * If an unprivileged user inherits a 1-many - * style socket with open associations on a - * privileged port, it MAY be permitted to - * accept new associations, but it SHOULD NOT - * be permitted to open new associations. - */ - if (ep->base.bind_addr.port < - inet_prot_sock(net) && - !ns_capable(net->user_ns, - CAP_NET_BIND_SERVICE)) { - err = -EACCES; - goto out_free; - } - } + return 0; +free: + sctp_association_free(asoc); + return err; +} - scope = sctp_scope(&to); - asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); - if (!asoc) { - err = -ENOMEM; - goto out_free; - } +static int sctp_connect_add_peer(struct sctp_association *asoc, + union sctp_addr *daddr, int addr_len) +{ + struct sctp_endpoint *ep = asoc->ep; + struct sctp_association *old; + struct sctp_transport *t; + int err; - err = sctp_assoc_set_bind_addr_from_ep(asoc, scope, - GFP_KERNEL); - if (err < 0) { - goto out_free; - } + err = sctp_verify_addr(ep->base.sk, daddr, addr_len); + if (err) + return err; - } + old = sctp_endpoint_lookup_assoc(ep, daddr, &t); + if (old && old != asoc) + return old->state >= SCTP_STATE_ESTABLISHED ? -EISCONN + : -EALREADY; + + if (sctp_endpoint_is_peeled_off(ep, daddr)) + return -EADDRNOTAVAIL; + + t = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); + if (!t) + return -ENOMEM; - /* Prime the peer's transport structures. */ - transport = sctp_assoc_add_peer(asoc, &to, GFP_KERNEL, - SCTP_UNKNOWN); - if (!transport) { - err = -ENOMEM; + return 0; +} + +/* __sctp_connect(struct sock* sk, struct sockaddr *kaddrs, int addrs_size) + * + * Common routine for handling connect() and sctp_connectx(). + * Connect will come in with just a single address. + */ +static int __sctp_connect(struct sock *sk, struct sockaddr *kaddrs, + int addrs_size, int flags, sctp_assoc_t *assoc_id) +{ + struct sctp_sock *sp = sctp_sk(sk); + struct sctp_endpoint *ep = sp->ep; + struct sctp_transport *transport; + struct sctp_association *asoc; + void *addr_buf = kaddrs; + union sctp_addr *daddr; + struct sctp_af *af; + int walk_size, err; + long timeo; + + if (sctp_sstate(sk, ESTABLISHED) || sctp_sstate(sk, CLOSING) || + (sctp_style(sk, TCP) && sctp_sstate(sk, LISTENING))) + return -EISCONN; + + daddr = addr_buf; + af = sctp_get_af_specific(daddr->sa.sa_family); + if (!af || af->sockaddr_len > addrs_size) + return -EINVAL; + + err = sctp_verify_addr(sk, daddr, af->sockaddr_len); + if (err) + return err; + + asoc = sctp_endpoint_lookup_assoc(ep, daddr, &transport); + if (asoc) + return asoc->state >= SCTP_STATE_ESTABLISHED ? -EISCONN + : -EALREADY; + + err = sctp_connect_new_asoc(ep, daddr, NULL, &transport); + if (err) + return err; + asoc = transport->asoc; + + addr_buf += af->sockaddr_len; + walk_size = af->sockaddr_len; + while (walk_size < addrs_size) { + err = -EINVAL; + if (walk_size + sizeof(sa_family_t) > addrs_size) goto out_free; - } - addrcnt++; - addr_buf += af->sockaddr_len; + daddr = addr_buf; + af = sctp_get_af_specific(daddr->sa.sa_family); + if (!af || af->sockaddr_len + walk_size > addrs_size) + goto out_free; + + if (asoc->peer.port != ntohs(daddr->v4.sin_port)) + goto out_free; + + err = sctp_connect_add_peer(asoc, daddr, af->sockaddr_len); + if (err) + goto out_free; + + addr_buf += af->sockaddr_len; walk_size += af->sockaddr_len; } @@ -1209,40 +1210,25 @@ static int __sctp_connect(struct sock *sk, goto out_free; } - err = sctp_primitive_ASSOCIATE(net, asoc, NULL); - if (err < 0) { + err = sctp_primitive_ASSOCIATE(sock_net(sk), asoc, NULL); + if (err < 0) goto out_free; - } /* Initialize sk's dport and daddr for getpeername() */ inet_sk(sk)->inet_dport = htons(asoc->peer.port); - sp->pf->to_sk_daddr(sa_addr, sk); + sp->pf->to_sk_daddr(daddr, sk); sk->sk_err = 0; - timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); - if (assoc_id) *assoc_id = asoc->assoc_id; - err = sctp_wait_for_connect(asoc, &timeo); - /* Note: the asoc may be freed after the return of - * sctp_wait_for_connect. - */ - - /* Don't free association on exit. */ - asoc = NULL; + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + return sctp_wait_for_connect(asoc, &timeo); out_free: pr_debug("%s: took out_free path with asoc:%p kaddrs:%p err:%d\n", __func__, asoc, kaddrs, err); - - if (asoc) { - /* sctp_primitive_ASSOCIATE may have added this association - * To the hash table, try to unhash it, just in case, its a noop - * if it wasn't hashed so we're safe - */ - sctp_association_free(asoc); - } + sctp_association_free(asoc); return err; } @@ -1312,7 +1298,8 @@ static int __sctp_setsockopt_connectx(struct sock *sk, pr_debug("%s: sk:%p addrs:%p addrs_size:%d\n", __func__, sk, addrs, addrs_size); - if (unlikely(addrs_size <= 0)) + /* make sure the 1st addr's sa_family is accessible later */ + if (unlikely(addrs_size < sizeof(sa_family_t))) return -EINVAL; kaddrs = memdup_user(addrs, addrs_size); @@ -1660,9 +1647,7 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, struct sctp_transport **tp) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; - struct net *net = sock_net(sk); struct sctp_association *asoc; - enum sctp_scope scope; struct cmsghdr *cmsg; __be32 flowinfo = 0; struct sctp_af *af; @@ -1677,20 +1662,6 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, sctp_sstate(sk, CLOSING))) return -EADDRNOTAVAIL; - if (sctp_endpoint_is_peeled_off(ep, daddr)) - return -EADDRNOTAVAIL; - - if (!ep->base.bind_addr.port) { - if (sctp_autobind(sk)) - return -EAGAIN; - } else { - if (ep->base.bind_addr.port < inet_prot_sock(net) && - !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) - return -EACCES; - } - - scope = sctp_scope(daddr); - /* Label connection socket for first association 1-to-many * style for client sequence socket()->sendmsg(). This * needs to be done before sctp_assoc_add_peer() as that will @@ -1706,45 +1677,10 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, if (err < 0) return err; - asoc = sctp_association_new(ep, sk, scope, GFP_KERNEL); - if (!asoc) - return -ENOMEM; - - if (sctp_assoc_set_bind_addr_from_ep(asoc, scope, GFP_KERNEL) < 0) { - err = -ENOMEM; - goto free; - } - - if (cmsgs->init) { - struct sctp_initmsg *init = cmsgs->init; - - if (init->sinit_num_ostreams) { - __u16 outcnt = init->sinit_num_ostreams; - - asoc->c.sinit_num_ostreams = outcnt; - /* outcnt has been changed, need to re-init stream */ - err = sctp_stream_init(&asoc->stream, outcnt, 0, - GFP_KERNEL); - if (err) - goto free; - } - - if (init->sinit_max_instreams) - asoc->c.sinit_max_instreams = init->sinit_max_instreams; - - if (init->sinit_max_attempts) - asoc->max_init_attempts = init->sinit_max_attempts; - - if (init->sinit_max_init_timeo) - asoc->max_init_timeo = - msecs_to_jiffies(init->sinit_max_init_timeo); - } - - *tp = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, SCTP_UNKNOWN); - if (!*tp) { - err = -ENOMEM; - goto free; - } + err = sctp_connect_new_asoc(ep, daddr, cmsgs->init, tp); + if (err) + return err; + asoc = (*tp)->asoc; if (!cmsgs->addrs_msg) return 0; @@ -1754,8 +1690,6 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, /* sendv addr list parse */ for_each_cmsghdr(cmsg, cmsgs->addrs_msg) { - struct sctp_transport *transport; - struct sctp_association *old; union sctp_addr _daddr; int dlen; @@ -1789,30 +1723,10 @@ static int sctp_sendmsg_new_asoc(struct sock *sk, __u16 sflags, daddr->v6.sin6_port = htons(asoc->peer.port); memcpy(&daddr->v6.sin6_addr, CMSG_DATA(cmsg), dlen); } - err = sctp_verify_addr(sk, daddr, sizeof(*daddr)); - if (err) - goto free; - - old = sctp_endpoint_lookup_assoc(ep, daddr, &transport); - if (old && old != asoc) { - if (old->state >= SCTP_STATE_ESTABLISHED) - err = -EISCONN; - else - err = -EALREADY; - goto free; - } - if (sctp_endpoint_is_peeled_off(ep, daddr)) { - err = -EADDRNOTAVAIL; - goto free; - } - - transport = sctp_assoc_add_peer(asoc, daddr, GFP_KERNEL, - SCTP_UNKNOWN); - if (!transport) { - err = -ENOMEM; + err = sctp_connect_add_peer(asoc, daddr, sizeof(*daddr)); + if (err) goto free; - } } return 0; @@ -3415,7 +3329,6 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval, unsigned int optlen) { - struct net *net = sock_net(sk); struct sctp_sock *sp; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; @@ -3425,7 +3338,7 @@ static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optva sp = sctp_sk(sk); - if (!net->sctp.addip_enable) + if (!sp->ep->asconf_enable) return -EPERM; if (optlen != sizeof(struct sctp_setpeerprim)) @@ -3775,9 +3688,6 @@ static int sctp_setsockopt_auth_key(struct sock *sk, struct sctp_association *asoc; int ret = -EINVAL; - if (!ep->auth_enable) - return -EACCES; - if (optlen <= sizeof(struct sctp_authkey)) return -EINVAL; /* authkey->sca_keylength is u16, so optlen can't be bigger than @@ -3844,9 +3754,6 @@ static int sctp_setsockopt_active_key(struct sock *sk, struct sctp_authkeyid val; int ret = 0; - if (!ep->auth_enable) - return -EACCES; - if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -3898,9 +3805,6 @@ static int sctp_setsockopt_del_key(struct sock *sk, struct sctp_authkeyid val; int ret = 0; - if (!ep->auth_enable) - return -EACCES; - if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -3951,9 +3855,6 @@ static int sctp_setsockopt_deactivate_key(struct sock *sk, char __user *optval, struct sctp_authkeyid val; int ret = 0; - if (!ep->auth_enable) - return -EACCES; - if (optlen != sizeof(struct sctp_authkeyid)) return -EINVAL; if (copy_from_user(&val, optval, optlen)) @@ -4584,6 +4485,110 @@ static int sctp_setsockopt_event(struct sock *sk, char __user *optval, return retval; } +static int sctp_setsockopt_asconf_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + struct sctp_endpoint *ep; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + goto out; + + ep = sctp_sk(sk)->ep; + ep->asconf_enable = !!params.assoc_value; + + if (ep->asconf_enable && ep->auth_enable) { + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); + } + + retval = 0; + +out: + return retval; +} + +static int sctp_setsockopt_auth_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + struct sctp_endpoint *ep; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + goto out; + + ep = sctp_sk(sk)->ep; + if (params.assoc_value) { + retval = sctp_auth_init(ep, GFP_KERNEL); + if (retval) + goto out; + if (ep->asconf_enable) { + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF); + sctp_auth_ep_add_chunkid(ep, SCTP_CID_ASCONF_ACK); + } + } + + ep->auth_enable = !!params.assoc_value; + retval = 0; + +out: + return retval; +} + +static int sctp_setsockopt_ecn_supported(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EINVAL; + + if (optlen != sizeof(params)) + goto out; + + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) + goto out; + + sctp_sk(sk)->ep->ecn_enable = !!params.assoc_value; + retval = 0; + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4784,6 +4789,15 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_EVENT: retval = sctp_setsockopt_event(sk, optval, optlen); break; + case SCTP_ASCONF_SUPPORTED: + retval = sctp_setsockopt_asconf_supported(sk, optval, optlen); + break; + case SCTP_AUTH_SUPPORTED: + retval = sctp_setsockopt_auth_supported(sk, optval, optlen); + break; + case SCTP_ECN_SUPPORTED: + retval = sctp_setsockopt_ecn_supported(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -6921,9 +6935,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, struct sctp_authkeyid val; struct sctp_association *asoc; - if (!ep->auth_enable) - return -EACCES; - if (len < sizeof(struct sctp_authkeyid)) return -EINVAL; @@ -6935,10 +6946,15 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, if (!asoc && val.scact_assoc_id && sctp_style(sk, UDP)) return -EINVAL; - if (asoc) + if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; val.scact_keynumber = asoc->active_key_id; - else + } else { + if (!ep->auth_enable) + return -EACCES; val.scact_keynumber = ep->active_key_id; + } if (put_user(len, optlen)) return -EFAULT; @@ -6951,7 +6967,6 @@ static int sctp_getsockopt_active_key(struct sock *sk, int len, static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct sctp_authchunks __user *p = (void __user *)optval; struct sctp_authchunks val; struct sctp_association *asoc; @@ -6959,9 +6974,6 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!ep->auth_enable) - return -EACCES; - if (len < sizeof(struct sctp_authchunks)) return -EINVAL; @@ -6973,6 +6985,9 @@ static int sctp_getsockopt_peer_auth_chunks(struct sock *sk, int len, if (!asoc) return -EINVAL; + if (!asoc->peer.auth_capable) + return -EACCES; + ch = asoc->peer.peer_chunks; if (!ch) goto num; @@ -7004,9 +7019,6 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, u32 num_chunks = 0; char __user *to; - if (!ep->auth_enable) - return -EACCES; - if (len < sizeof(struct sctp_authchunks)) return -EINVAL; @@ -7019,8 +7031,15 @@ static int sctp_getsockopt_local_auth_chunks(struct sock *sk, int len, sctp_style(sk, UDP)) return -EINVAL; - ch = asoc ? (struct sctp_chunks_param *)asoc->c.auth_chunks - : ep->auth_chunk_list; + if (asoc) { + if (!asoc->peer.auth_capable) + return -EACCES; + ch = (struct sctp_chunks_param *)asoc->c.auth_chunks; + } else { + if (!ep->auth_enable) + return -EACCES; + ch = ep->auth_chunk_list; + } if (!ch) goto num; @@ -7764,6 +7783,123 @@ static int sctp_getsockopt_event(struct sock *sk, int len, char __user *optval, return 0; } +static int sctp_getsockopt_asconf_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = asoc ? asoc->peer.asconf_capable + : sctp_sk(sk)->ep->asconf_enable; + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + +static int sctp_getsockopt_auth_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = asoc ? asoc->peer.auth_capable + : sctp_sk(sk)->ep->auth_enable; + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + +static int sctp_getsockopt_ecn_supported(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc && params.assoc_id != SCTP_FUTURE_ASSOC && + sctp_style(sk, UDP)) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = asoc ? asoc->peer.ecn_capable + : sctp_sk(sk)->ep->ecn_enable; + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -7965,6 +8101,17 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, case SCTP_EVENT: retval = sctp_getsockopt_event(sk, len, optval, optlen); break; + case SCTP_ASCONF_SUPPORTED: + retval = sctp_getsockopt_asconf_supported(sk, len, optval, + optlen); + break; + case SCTP_AUTH_SUPPORTED: + retval = sctp_getsockopt_auth_supported(sk, len, optval, + optlen); + break; + case SCTP_ECN_SUPPORTED: + retval = sctp_getsockopt_ecn_supported(sk, len, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 1250751bca1b..238cf1737576 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -278,6 +278,13 @@ static struct ctl_table sctp_net_table[] = { .proc_handler = proc_dointvec, }, { + .procname = "ecn_enable", + .data = &init_net.sctp.ecn_enable, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { .procname = "addr_scope_policy", .data = &init_net.sctp.scope_policy, .maxlen = sizeof(int), diff --git a/net/sctp/transport.c b/net/sctp/transport.c index e2f8e369cd08..7235a6032671 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -43,8 +43,8 @@ static struct sctp_transport *sctp_transport_init(struct net *net, gfp_t gfp) { /* Copy in the address. */ - peer->ipaddr = *addr; peer->af_specific = sctp_get_af_specific(addr->sa.sa_family); + memcpy(&peer->ipaddr, addr, peer->af_specific->sockaddr_len); memset(&peer->saddr, 0, sizeof(union sctp_addr)); peer->sack_generation = 0; diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c index 550fdf18d3b3..3b7f721c023b 100644 --- a/net/sunrpc/auth_gss/gss_krb5_keys.c +++ b/net/sunrpc/auth_gss/gss_krb5_keys.c @@ -228,14 +228,11 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e, ret = 0; err_free_raw: - memset(rawkey, 0, keybytes); - kfree(rawkey); + kzfree(rawkey); err_free_out: - memset(outblockdata, 0, blocksize); - kfree(outblockdata); + kzfree(outblockdata); err_free_in: - memset(inblockdata, 0, blocksize); - kfree(inblockdata); + kzfree(inblockdata); err_free_cipher: crypto_free_sync_skcipher(cipher); err_return: diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 1336f3cdad38..6ef1abdd525f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -185,7 +185,7 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) } /* We have to transmit across all bearers */ - skb_queue_head_init(&_xmitq); + __skb_queue_head_init(&_xmitq); for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { if (!bb->dests[bearer_id]) continue; @@ -256,7 +256,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, struct sk_buff_head xmitq; int rc = 0; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); @@ -286,7 +286,7 @@ static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); - skb_queue_head_init(&_pkts); + __skb_queue_head_init(&_pkts); list_for_each_entry_safe(dst, tmp, &dests->list, list) { dnode = dst->node; @@ -344,7 +344,7 @@ static int tipc_mcast_send_sync(struct net *net, struct sk_buff *skb, msg_set_size(_hdr, MCAST_H_SIZE); msg_set_is_rcast(_hdr, !msg_is_rcast(hdr)); - skb_queue_head_init(&tmpq); + __skb_queue_head_init(&tmpq); __skb_queue_tail(&tmpq, _skb); if (method->rcast) tipc_bcast_xmit(net, &tmpq, cong_link_cnt); @@ -378,7 +378,7 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, int rc = 0; skb_queue_head_init(&inputq); - skb_queue_head_init(&localq); + __skb_queue_head_init(&localq); /* Clone packets before they are consumed by next call */ if (dests->local && !tipc_msg_reassemble(pkts, &localq)) { @@ -406,8 +406,10 @@ int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts, rc = tipc_bcast_xmit(net, pkts, cong_link_cnt); } - if (dests->local) + if (dests->local) { + tipc_loopback_trace(net, &localq); tipc_sk_mcast_rcv(net, &localq, &inputq); + } exit: /* This queue should normally be empty by now */ __skb_queue_purge(pkts); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index a809c0ec8d15..0214aa1c4427 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -389,6 +389,11 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, dev_put(dev); return -EINVAL; } + if (dev == net->loopback_dev) { + dev_put(dev); + pr_info("Enabling <%s> not permitted\n", b->name); + return -EINVAL; + } /* Autoconfigure own node identity if needed */ if (!tipc_own_id(net) && hwaddr_len <= NODE_ID_LEN) { @@ -674,6 +679,65 @@ void tipc_bearer_stop(struct net *net) } } +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts) +{ + struct net_device *dev = net->loopback_dev; + struct sk_buff *skb, *_skb; + int exp; + + skb_queue_walk(pkts, _skb) { + skb = pskb_copy(_skb, GFP_ATOMIC); + if (!skb) + continue; + + exp = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if (exp > 0 && pskb_expand_head(skb, exp, 0, GFP_ATOMIC)) { + kfree_skb(skb); + continue; + } + + skb_reset_network_header(skb); + dev_hard_header(skb, dev, ETH_P_TIPC, dev->dev_addr, + dev->dev_addr, skb->len); + skb->dev = dev; + skb->pkt_type = PACKET_HOST; + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->protocol = eth_type_trans(skb, dev); + netif_rx_ni(skb); + } +} + +static int tipc_loopback_rcv_pkt(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *od) +{ + consume_skb(skb); + return NET_RX_SUCCESS; +} + +int tipc_attach_loopback(struct net *net) +{ + struct net_device *dev = net->loopback_dev; + struct tipc_net *tn = tipc_net(net); + + if (!dev) + return -ENODEV; + + dev_hold(dev); + tn->loopback_pt.dev = dev; + tn->loopback_pt.type = htons(ETH_P_TIPC); + tn->loopback_pt.func = tipc_loopback_rcv_pkt; + dev_add_pack(&tn->loopback_pt); + return 0; +} + +void tipc_detach_loopback(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + dev_remove_pack(&tn->loopback_pt); + dev_put(net->loopback_dev); +} + /* Caller should hold rtnl_lock to protect the bearer */ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, struct tipc_bearer *bearer, int nlflags) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 7f4c569594a5..ea0f3c49cbed 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -232,6 +232,16 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct tipc_media_addr *dst); void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq); +void tipc_clone_to_loopback(struct net *net, struct sk_buff_head *pkts); +int tipc_attach_loopback(struct net *net); +void tipc_detach_loopback(struct net *net); + +static inline void tipc_loopback_trace(struct net *net, + struct sk_buff_head *pkts) +{ + if (unlikely(dev_nit_active(net->loopback_dev))) + tipc_clone_to_loopback(net, pkts); +} /* check if device MTU is too low for tipc headers */ static inline bool tipc_mtu_bad(struct net_device *dev, unsigned int reserve) diff --git a/net/tipc/core.c b/net/tipc/core.c index c8370722f0bb..23cb379a93d6 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -82,6 +82,10 @@ static int __net_init tipc_init_net(struct net *net) if (err) goto out_bclink; + err = tipc_attach_loopback(net); + if (err) + goto out_bclink; + return 0; out_bclink: @@ -94,6 +98,7 @@ out_sk_rht: static void __net_exit tipc_exit_net(struct net *net) { + tipc_detach_loopback(net); tipc_net_stop(net); tipc_bcast_stop(net); tipc_nametbl_stop(net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 7a68e1b6a066..60d829581068 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -125,6 +125,9 @@ struct tipc_net { /* Cluster capabilities */ u16 capabilities; + + /* Tracing of node internal messages */ + struct packet_type loopback_pt; }; static inline struct tipc_net *tipc_net(struct net *net) diff --git a/net/tipc/group.c b/net/tipc/group.c index 5f98d38bcf08..89257e2a980d 100644 --- a/net/tipc/group.c +++ b/net/tipc/group.c @@ -199,7 +199,7 @@ void tipc_group_join(struct net *net, struct tipc_group *grp, int *sk_rcvbuf) struct tipc_member *m, *tmp; struct sk_buff_head xmitq; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, &xmitq); tipc_group_update_member(m, 0); @@ -435,7 +435,7 @@ bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, return true; if (state == MBR_PENDING && adv == ADV_IDLE) return true; - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); tipc_node_distr_xmit(grp->net, &xmitq); return true; diff --git a/net/tipc/link.c b/net/tipc/link.c index c2c5c53cad22..6cc75ffd9e2c 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -176,6 +176,7 @@ struct tipc_link { /* Fragmentation/reassembly */ struct sk_buff *reasm_buf; + struct sk_buff *reasm_tnlmsg; /* Broadcast */ u16 ackers; @@ -849,18 +850,31 @@ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) */ static void link_prepare_wakeup(struct tipc_link *l) { + struct sk_buff_head *wakeupq = &l->wakeupq; + struct sk_buff_head *inputq = l->inputq; struct sk_buff *skb, *tmp; - int imp, i = 0; + struct sk_buff_head tmpq; + int avail[5] = {0,}; + int imp = 0; + + __skb_queue_head_init(&tmpq); + + for (; imp <= TIPC_SYSTEM_IMPORTANCE; imp++) + avail[imp] = l->backlog[imp].limit - l->backlog[imp].len; - skb_queue_walk_safe(&l->wakeupq, skb, tmp) { + skb_queue_walk_safe(wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - if (l->backlog[imp].len < l->backlog[imp].limit) { - skb_unlink(skb, &l->wakeupq); - skb_queue_tail(l->inputq, skb); - } else if (i++ > 10) { - break; - } + if (avail[imp] <= 0) + continue; + avail[imp]--; + __skb_unlink(skb, wakeupq); + __skb_queue_tail(&tmpq, skb); } + + spin_lock_bh(&inputq->lock); + skb_queue_splice_tail(&tmpq, inputq); + spin_unlock_bh(&inputq->lock); + } void tipc_link_reset(struct tipc_link *l) @@ -893,8 +907,10 @@ void tipc_link_reset(struct tipc_link *l) l->backlog[TIPC_CRITICAL_IMPORTANCE].len = 0; l->backlog[TIPC_SYSTEM_IMPORTANCE].len = 0; kfree_skb(l->reasm_buf); + kfree_skb(l->reasm_tnlmsg); kfree_skb(l->failover_reasm_skb); l->reasm_buf = NULL; + l->reasm_tnlmsg = NULL; l->failover_reasm_skb = NULL; l->rcv_unacked = 0; l->snd_nxt = 1; @@ -936,7 +952,10 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, int rc = 0; if (unlikely(msg_size(hdr) > mtu)) { - skb_queue_purge(list); + pr_warn("Too large msg, purging xmit list %d %d %d %d %d!\n", + skb_queue_len(list), msg_user(hdr), + msg_type(hdr), msg_size(hdr), mtu); + __skb_queue_purge(list); return -EMSGSIZE; } @@ -965,7 +984,7 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, if (likely(skb_queue_len(transmq) < maxwin)) { _skb = skb_clone(skb, GFP_ATOMIC); if (!_skb) { - skb_queue_purge(list); + __skb_queue_purge(list); return -ENOBUFS; } __skb_dequeue(list); @@ -1238,6 +1257,7 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { struct sk_buff **reasm_skb = &l->failover_reasm_skb; + struct sk_buff **reasm_tnlmsg = &l->reasm_tnlmsg; struct sk_buff_head *fdefq = &l->failover_deferdq; struct tipc_msg *hdr = buf_msg(skb); struct sk_buff *iskb; @@ -1245,40 +1265,56 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb, int rc = 0; u16 seqno; - /* SYNCH_MSG */ - if (msg_type(hdr) == SYNCH_MSG) - goto drop; + if (msg_type(hdr) == SYNCH_MSG) { + kfree_skb(skb); + return 0; + } - /* FAILOVER_MSG */ - if (!tipc_msg_extract(skb, &iskb, &ipos)) { - pr_warn_ratelimited("Cannot extract FAILOVER_MSG, defq: %d\n", - skb_queue_len(fdefq)); - return rc; + /* Not a fragment? */ + if (likely(!msg_nof_fragms(hdr))) { + if (unlikely(!tipc_msg_extract(skb, &iskb, &ipos))) { + pr_warn_ratelimited("Unable to extract msg, defq: %d\n", + skb_queue_len(fdefq)); + return 0; + } + kfree_skb(skb); + } else { + /* Set fragment type for buf_append */ + if (msg_fragm_no(hdr) == 1) + msg_set_type(hdr, FIRST_FRAGMENT); + else if (msg_fragm_no(hdr) < msg_nof_fragms(hdr)) + msg_set_type(hdr, FRAGMENT); + else + msg_set_type(hdr, LAST_FRAGMENT); + + if (!tipc_buf_append(reasm_tnlmsg, &skb)) { + /* Successful but non-complete reassembly? */ + if (*reasm_tnlmsg || link_is_bc_rcvlink(l)) + return 0; + pr_warn_ratelimited("Unable to reassemble tunnel msg\n"); + return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); + } + iskb = skb; } do { seqno = buf_seqno(iskb); - if (unlikely(less(seqno, l->drop_point))) { kfree_skb(iskb); continue; } - if (unlikely(seqno != l->drop_point)) { __tipc_skb_queue_sorted(fdefq, seqno, iskb); continue; } l->drop_point++; - if (!tipc_data_input(l, iskb, inputq)) rc |= tipc_link_input(l, iskb, inputq, reasm_skb); if (unlikely(rc)) break; } while ((iskb = __tipc_skb_dequeue(fdefq, l->drop_point))); -drop: - kfree_skb(skb); return rc; } @@ -1644,7 +1680,7 @@ void tipc_link_create_dummy_tnl_msg(struct tipc_link *l, struct sk_buff *skb; u32 dnode = l->addr; - skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tnlq); skb = tipc_msg_create(TUNNEL_PROTOCOL, FAILOVER_MSG, INT_H_SIZE, BASIC_H_SIZE, dnode, onode, 0, 0, 0); @@ -1675,14 +1711,18 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, struct sk_buff *skb, *tnlskb; struct tipc_msg *hdr, tnlhdr; struct sk_buff_head *queue = &l->transmq; - struct sk_buff_head tmpxq, tnlq; + struct sk_buff_head tmpxq, tnlq, frags; u16 pktlen, pktcnt, seqno = l->snd_nxt; + bool pktcnt_need_update = false; + u16 syncpt; + int rc; if (!tnl) return; - skb_queue_head_init(&tnlq); - skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&tnlq); + __skb_queue_head_init(&tmpxq); + __skb_queue_head_init(&frags); /* At least one packet required for safe algorithm => add dummy */ skb = tipc_msg_create(TIPC_LOW_IMPORTANCE, TIPC_DIRECT_MSG, @@ -1692,10 +1732,35 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, pr_warn("%sunable to create tunnel packet\n", link_co_err); return; } - skb_queue_tail(&tnlq, skb); + __skb_queue_tail(&tnlq, skb); tipc_link_xmit(l, &tnlq, &tmpxq); __skb_queue_purge(&tmpxq); + /* Link Synching: + * From now on, send only one single ("dummy") SYNCH message + * to peer. The SYNCH message does not contain any data, just + * a header conveying the synch point to the peer. + */ + if (mtyp == SYNCH_MSG && (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + tnlskb = tipc_msg_create(TUNNEL_PROTOCOL, SYNCH_MSG, + INT_H_SIZE, 0, l->addr, + tipc_own_addr(l->net), + 0, 0, 0); + if (!tnlskb) { + pr_warn("%sunable to create dummy SYNCH_MSG\n", + link_co_err); + return; + } + + hdr = buf_msg(tnlskb); + syncpt = l->snd_nxt + skb_queue_len(&l->backlogq) - 1; + msg_set_syncpt(hdr, syncpt); + msg_set_bearer_id(hdr, l->peer_bearer_id); + __skb_queue_tail(&tnlq, tnlskb); + tipc_link_xmit(tnl, &tnlq, xmitq); + return; + } + /* Initialize reusable tunnel packet header */ tipc_msg_init(tipc_own_addr(l->net), &tnlhdr, TUNNEL_PROTOCOL, mtyp, INT_H_SIZE, l->addr); @@ -1713,6 +1778,39 @@ tnl: if (queue == &l->backlogq) msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); + + /* Tunnel link MTU is not large enough? This could be + * due to: + * 1) Link MTU has just changed or set differently; + * 2) Or FAILOVER on the top of a SYNCH message + * + * The 2nd case should not happen if peer supports + * TIPC_TUNNEL_ENHANCED + */ + if (pktlen > tnl->mtu - INT_H_SIZE) { + if (mtyp == FAILOVER_MSG && + (tnl->peer_caps & TIPC_TUNNEL_ENHANCED)) { + rc = tipc_msg_fragment(skb, &tnlhdr, tnl->mtu, + &frags); + if (rc) { + pr_warn("%sunable to frag msg: rc %d\n", + link_co_err, rc); + return; + } + pktcnt += skb_queue_len(&frags) - 1; + pktcnt_need_update = true; + skb_queue_splice_tail_init(&frags, &tnlq); + continue; + } + /* Unluckily, peer doesn't have TIPC_TUNNEL_ENHANCED + * => Just warn it and return! + */ + pr_warn_ratelimited("%stoo large msg <%d, %d>: %d!\n", + link_co_err, msg_user(hdr), + msg_type(hdr), msg_size(hdr)); + return; + } + msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { @@ -1728,6 +1826,12 @@ tnl: goto tnl; } + if (pktcnt_need_update) + skb_queue_walk(&tnlq, skb) { + hdr = buf_msg(skb); + msg_set_msgcnt(hdr, pktcnt); + } + tipc_link_xmit(tnl, &tnlq, xmitq); if (mtyp == FAILOVER_MSG) { diff --git a/net/tipc/msg.c b/net/tipc/msg.c index f48e5857210f..e6d49cdc61b4 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -244,6 +244,65 @@ bool tipc_msg_validate(struct sk_buff **_skb) } /** + * tipc_msg_fragment - build a fragment skb list for TIPC message + * + * @skb: TIPC message skb + * @hdr: internal msg header to be put on the top of the fragments + * @pktmax: max size of a fragment incl. the header + * @frags: returned fragment skb list + * + * Returns 0 if the fragmentation is successful, otherwise: -EINVAL + * or -ENOMEM + */ +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags) +{ + int pktno, nof_fragms, dsz, dmax, eat; + struct tipc_msg *_hdr; + struct sk_buff *_skb; + u8 *data; + + /* Non-linear buffer? */ + if (skb_linearize(skb)) + return -ENOMEM; + + data = (u8 *)skb->data; + dsz = msg_size(buf_msg(skb)); + dmax = pktmax - INT_H_SIZE; + if (dsz <= dmax || !dmax) + return -EINVAL; + + nof_fragms = dsz / dmax + 1; + for (pktno = 1; pktno <= nof_fragms; pktno++) { + if (pktno < nof_fragms) + eat = dmax; + else + eat = dsz % dmax; + /* Allocate a new fragment */ + _skb = tipc_buf_acquire(INT_H_SIZE + eat, GFP_ATOMIC); + if (!_skb) + goto error; + skb_orphan(_skb); + __skb_queue_tail(frags, _skb); + /* Copy header & data to the fragment */ + skb_copy_to_linear_data(_skb, hdr, INT_H_SIZE); + skb_copy_to_linear_data_offset(_skb, INT_H_SIZE, data, eat); + data += eat; + /* Update the fragment's header */ + _hdr = buf_msg(_skb); + msg_set_fragm_no(_hdr, pktno); + msg_set_nof_fragms(_hdr, nof_fragms); + msg_set_size(_hdr, INT_H_SIZE + eat); + } + return 0; + +error: + __skb_queue_purge(frags); + __skb_queue_head_init(frags); + return -ENOMEM; +} + +/** * tipc_msg_build - create buffer chain containing specified header and data * @mhdr: Message header, to be prepended to data * @m: User message diff --git a/net/tipc/msg.h b/net/tipc/msg.h index d7ebc9e955f6..0daa6f04ca81 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -723,12 +723,26 @@ static inline void msg_set_last_bcast(struct tipc_msg *m, u32 n) msg_set_bits(m, 4, 16, 0xffff, n); } +static inline u32 msg_nof_fragms(struct tipc_msg *m) +{ + return msg_bits(m, 4, 0, 0xffff); +} + +static inline void msg_set_nof_fragms(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 4, 0, 0xffff, n); +} + +static inline u32 msg_fragm_no(struct tipc_msg *m) +{ + return msg_bits(m, 4, 16, 0xffff); +} + static inline void msg_set_fragm_no(struct tipc_msg *m, u32 n) { msg_set_bits(m, 4, 16, 0xffff, n); } - static inline u16 msg_next_sent(struct tipc_msg *m) { return msg_bits(m, 4, 0, 0xffff); @@ -879,6 +893,16 @@ static inline void msg_set_msgcnt(struct tipc_msg *m, u16 n) msg_set_bits(m, 9, 16, 0xffff, n); } +static inline u16 msg_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + static inline u32 msg_conn_ack(struct tipc_msg *m) { return msg_bits(m, 9, 16, 0xffff); @@ -1037,6 +1061,8 @@ bool tipc_msg_bundle(struct sk_buff *skb, struct tipc_msg *msg, u32 mtu); bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, u32 mtu, u32 dnode); bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos); +int tipc_msg_fragment(struct sk_buff *skb, const struct tipc_msg *hdr, + int pktmax, struct sk_buff_head *frags); int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset, int dsz, int mtu, struct sk_buff_head *list); bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 241ed2274473..836e629e8f4a 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -190,7 +190,7 @@ void tipc_named_node_up(struct net *net, u32 dnode) struct name_table *nt = tipc_name_table(net); struct sk_buff_head head; - skb_queue_head_init(&head); + __skb_queue_head_init(&head); read_lock_bh(&nt->cluster_scope_lock); named_distribute(net, &head, dnode, &nt->cluster_scope); diff --git a/net/tipc/node.c b/net/tipc/node.c index 3a5be1d7e572..c8f6177dd5a2 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1443,13 +1443,15 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, int rc; if (in_own_node(net, dnode)) { + tipc_loopback_trace(net, list); + spin_lock_init(&list->lock); tipc_sk_rcv(net, list); return 0; } n = tipc_node_find(net, dnode); if (unlikely(!n)) { - skb_queue_purge(list); + __skb_queue_purge(list); return -EHOSTUNREACH; } @@ -1458,7 +1460,7 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, if (unlikely(bearer_id == INVALID_BEARER_ID)) { tipc_node_read_unlock(n); tipc_node_put(n); - skb_queue_purge(list); + __skb_queue_purge(list); return -EHOSTUNREACH; } @@ -1490,7 +1492,7 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, { struct sk_buff_head head; - skb_queue_head_init(&head); + __skb_queue_head_init(&head); __skb_queue_tail(&head, skb); tipc_node_xmit(net, &head, dnode, selector); return 0; @@ -1649,7 +1651,6 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, int usr = msg_user(hdr); int mtyp = msg_type(hdr); u16 oseqno = msg_seqno(hdr); - u16 iseqno = msg_seqno(msg_inner_hdr(hdr)); u16 exp_pkts = msg_msgcnt(hdr); u16 rcv_nxt, syncpt, dlv_nxt, inputq_len; int state = n->state; @@ -1748,7 +1749,10 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { - syncpt = iseqno + exp_pkts - 1; + if (n->capabilities & TIPC_TUNNEL_ENHANCED) + syncpt = msg_syncpt(hdr); + else + syncpt = msg_seqno(msg_inner_hdr(hdr)) + exp_pkts - 1; if (!tipc_link_is_up(l)) __tipc_node_link_up(n, bearer_id, xmitq); if (n->state == SELF_UP_PEER_UP) { diff --git a/net/tipc/node.h b/net/tipc/node.h index c0bf49ea3de4..291d0ecd4101 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -53,7 +53,8 @@ enum { TIPC_NODE_ID128 = (1 << 5), TIPC_LINK_PROTO_SEQNO = (1 << 6), TIPC_MCAST_RBCTL = (1 << 7), - TIPC_GAP_ACK_BLOCK = (1 << 8) + TIPC_GAP_ACK_BLOCK = (1 << 8), + TIPC_TUNNEL_ENHANCED = (1 << 9) }; #define TIPC_NODE_CAPABILITIES (TIPC_SYN_BIT | \ @@ -64,7 +65,8 @@ enum { TIPC_NODE_ID128 | \ TIPC_LINK_PROTO_SEQNO | \ TIPC_MCAST_RBCTL | \ - TIPC_GAP_ACK_BLOCK) + TIPC_GAP_ACK_BLOCK | \ + TIPC_TUNNEL_ENHANCED) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 83ae41d7e554..3b9f8cc328f5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -809,7 +809,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_nameupper(hdr, seq->upper); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, msg, 0, dlen, mtu, &pkts); /* Send message if build was successful */ @@ -853,7 +853,7 @@ static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, msg_set_grp_bc_seqno(hdr, bc_snd_nxt); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) @@ -1058,7 +1058,7 @@ static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, msg_set_grp_bc_ack_req(hdr, ack); /* Build message as chain of buffers */ - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) return rc; @@ -1387,7 +1387,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(rc)) return rc; - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); mtu = tipc_node_get_mtu(net, dnode, tsk->portid); rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); if (unlikely(rc != dlen)) @@ -1445,7 +1445,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) int send, sent = 0; int rc = 0; - skb_queue_head_init(&pkts); + __skb_queue_head_init(&pkts); if (unlikely(dlen > INT_MAX)) return -EMSGSIZE; @@ -1805,7 +1805,7 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, /* Send group flow control advertisement when applicable */ if (tsk->group && msg_in_group(hdr) && !grp_evt) { - skb_queue_head_init(&xmitq); + __skb_queue_head_init(&xmitq); tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), msg_orignode(hdr), msg_origport(hdr), &xmitq); @@ -2674,7 +2674,7 @@ static void tipc_sk_timeout(struct timer_list *t) struct sk_buff_head list; int rc = 0; - skb_queue_head_init(&list); + __skb_queue_head_init(&list); bh_lock_sock(sk); /* Try again later if socket is busy */ diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index ca8ac96d22a9..3a12fc18239b 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -40,6 +40,7 @@ #include "socket.h" #include "addr.h" #include "msg.h" +#include "bearer.h" #include <net/sock.h> #include <linux/module.h> @@ -608,6 +609,7 @@ static void tipc_topsrv_kern_evt(struct net *net, struct tipc_event *evt) memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); skb_queue_head_init(&evtq); __skb_queue_tail(&evtq, skb); + tipc_loopback_trace(net, &evtq); tipc_sk_rcv(net, &evtq); } diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 43922d86e510..f959487c5cd1 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -61,7 +61,7 @@ static void tls_device_free_ctx(struct tls_context *ctx) if (ctx->rx_conf == TLS_HW) kfree(tls_offload_ctx_rx(ctx)); - tls_ctx_free(ctx); + tls_ctx_free(NULL, ctx); } static void tls_device_gc_task(struct work_struct *work) @@ -122,13 +122,10 @@ static struct net_device *get_netdev_for_sock(struct sock *sk) static void destroy_record(struct tls_record_info *record) { - int nr_frags = record->num_frags; - skb_frag_t *frag; + int i; - while (nr_frags-- > 0) { - frag = &record->frags[nr_frags]; - __skb_frag_unref(frag); - } + for (i = 0; i < record->num_frags; i++) + __skb_frag_unref(&record->frags[i]); kfree(record); } @@ -159,12 +156,8 @@ static void tls_icsk_clean_acked(struct sock *sk, u32 acked_seq) spin_lock_irqsave(&ctx->lock, flags); info = ctx->retransmit_hint; - if (info && !before(acked_seq, info->end_seq)) { + if (info && !before(acked_seq, info->end_seq)) ctx->retransmit_hint = NULL; - list_del(&info->list); - destroy_record(info); - deleted_records++; - } list_for_each_entry_safe(info, temp, &ctx->records_list, list) { if (before(acked_seq, info->end_seq)) @@ -243,14 +236,14 @@ static void tls_append_frag(struct tls_record_info *record, skb_frag_t *frag; frag = &record->frags[record->num_frags - 1]; - if (frag->page.p == pfrag->page && - frag->page_offset + frag->size == pfrag->offset) { - frag->size += size; + if (skb_frag_page(frag) == pfrag->page && + skb_frag_off(frag) + skb_frag_size(frag) == pfrag->offset) { + skb_frag_size_add(frag, size); } else { ++frag; - frag->page.p = pfrag->page; - frag->page_offset = pfrag->offset; - frag->size = size; + __skb_frag_set_page(frag, pfrag->page); + skb_frag_off_set(frag, pfrag->offset); + skb_frag_size_set(frag, size); ++record->num_frags; get_page(pfrag->page); } @@ -263,33 +256,15 @@ static int tls_push_record(struct sock *sk, struct tls_context *ctx, struct tls_offload_context_tx *offload_ctx, struct tls_record_info *record, - struct page_frag *pfrag, - int flags, - unsigned char record_type) + int flags) { struct tls_prot_info *prot = &ctx->prot_info; struct tcp_sock *tp = tcp_sk(sk); - struct page_frag dummy_tag_frag; skb_frag_t *frag; int i; - /* fill prepend */ - frag = &record->frags[0]; - tls_fill_prepend(ctx, - skb_frag_address(frag), - record->len - prot->prepend_size, - record_type, - prot->version); - - /* HW doesn't care about the data in the tag, because it fills it. */ - dummy_tag_frag.page = skb_frag_page(frag); - dummy_tag_frag.offset = 0; - - tls_append_frag(record, &dummy_tag_frag, prot->tag_size); record->end_seq = tp->write_seq + record->len; - spin_lock_irq(&offload_ctx->lock); - list_add_tail(&record->list, &offload_ctx->records_list); - spin_unlock_irq(&offload_ctx->lock); + list_add_tail_rcu(&record->list, &offload_ctx->records_list); offload_ctx->open_record = NULL; if (test_bit(TLS_TX_SYNC_SCHED, &ctx->flags)) @@ -301,8 +276,8 @@ static int tls_push_record(struct sock *sk, frag = &record->frags[i]; sg_unmark_end(&offload_ctx->sg_tx_data[i]); sg_set_page(&offload_ctx->sg_tx_data[i], skb_frag_page(frag), - frag->size, frag->page_offset); - sk_mem_charge(sk, frag->size); + skb_frag_size(frag), skb_frag_off(frag)); + sk_mem_charge(sk, skb_frag_size(frag)); get_page(skb_frag_page(frag)); } sg_mark_end(&offload_ctx->sg_tx_data[record->num_frags - 1]); @@ -311,6 +286,38 @@ static int tls_push_record(struct sock *sk, return tls_push_sg(sk, ctx, offload_ctx->sg_tx_data, 0, flags); } +static int tls_device_record_close(struct sock *sk, + struct tls_context *ctx, + struct tls_record_info *record, + struct page_frag *pfrag, + unsigned char record_type) +{ + struct tls_prot_info *prot = &ctx->prot_info; + int ret; + + /* append tag + * device will fill in the tag, we just need to append a placeholder + * use socket memory to improve coalescing (re-using a single buffer + * increases frag count) + * if we can't allocate memory now, steal some back from data + */ + if (likely(skb_page_frag_refill(prot->tag_size, pfrag, + sk->sk_allocation))) { + ret = 0; + tls_append_frag(record, pfrag, prot->tag_size); + } else { + ret = prot->tag_size; + if (record->len <= prot->overhead_size) + return -ENOMEM; + } + + /* fill prepend */ + tls_fill_prepend(ctx, skb_frag_address(&record->frags[0]), + record->len - prot->overhead_size, + record_type, prot->version); + return ret; +} + static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, struct page_frag *pfrag, size_t prepend_size) @@ -324,7 +331,7 @@ static int tls_create_new_record(struct tls_offload_context_tx *offload_ctx, frag = &record->frags[0]; __skb_frag_set_page(frag, pfrag->page); - frag->page_offset = pfrag->offset; + skb_frag_off_set(frag, pfrag->offset); skb_frag_size_set(frag, prepend_size); get_page(pfrag->page); @@ -365,6 +372,31 @@ static int tls_do_allocation(struct sock *sk, return 0; } +static int tls_device_copy_data(void *addr, size_t bytes, struct iov_iter *i) +{ + size_t pre_copy, nocache; + + pre_copy = ~((unsigned long)addr - 1) & (SMP_CACHE_BYTES - 1); + if (pre_copy) { + pre_copy = min(pre_copy, bytes); + if (copy_from_iter(addr, pre_copy, i) != pre_copy) + return -EFAULT; + bytes -= pre_copy; + addr += pre_copy; + } + + nocache = round_down(bytes, SMP_CACHE_BYTES); + if (copy_from_iter_nocache(addr, nocache, i) != nocache) + return -EFAULT; + bytes -= nocache; + addr += nocache; + + if (bytes && copy_from_iter(addr, bytes, i) != bytes) + return -EFAULT; + + return 0; +} + static int tls_push_data(struct sock *sk, struct iov_iter *msg_iter, size_t size, int flags, @@ -438,12 +470,10 @@ handle_error: copy = min_t(size_t, size, (pfrag->size - pfrag->offset)); copy = min_t(size_t, copy, (max_open_record_len - record->len)); - if (copy_from_iter_nocache(page_address(pfrag->page) + - pfrag->offset, - copy, msg_iter) != copy) { - rc = -EFAULT; + rc = tls_device_copy_data(page_address(pfrag->page) + + pfrag->offset, copy, msg_iter); + if (rc) goto handle_error; - } tls_append_frag(record, pfrag, copy); size -= copy; @@ -461,13 +491,24 @@ last_record: if (done || record->len >= max_open_record_len || (record->num_frags >= MAX_SKB_FRAGS - 1)) { + rc = tls_device_record_close(sk, tls_ctx, record, + pfrag, record_type); + if (rc) { + if (rc > 0) { + size += rc; + } else { + size = orig_size; + destroy_record(record); + ctx->open_record = NULL; + break; + } + } + rc = tls_push_record(sk, tls_ctx, ctx, record, - pfrag, - tls_push_record_flags, - record_type); + tls_push_record_flags); if (rc < 0) break; } @@ -542,12 +583,16 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, /* if retransmit_hint is irrelevant start * from the beggining of the list */ - info = list_first_entry(&context->records_list, - struct tls_record_info, list); + info = list_first_entry_or_null(&context->records_list, + struct tls_record_info, list); + if (!info) + return NULL; record_sn = context->unacked_record_sn; } - list_for_each_entry_from(info, &context->records_list, list) { + /* We just need the _rcu for the READ_ONCE() */ + rcu_read_lock(); + list_for_each_entry_from_rcu(info, &context->records_list, list) { if (before(seq, info->end_seq)) { if (!context->retransmit_hint || after(info->end_seq, @@ -556,12 +601,15 @@ struct tls_record_info *tls_get_record(struct tls_offload_context_tx *context, context->retransmit_hint = info; } *p_record_sn = record_sn; - return info; + goto exit_rcu_unlock; } record_sn++; } + info = NULL; - return NULL; +exit_rcu_unlock: + rcu_read_unlock(); + return info; } EXPORT_SYMBOL(tls_get_record); @@ -838,22 +886,18 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) struct net_device *netdev; char *iv, *rec_seq; struct sk_buff *skb; - int rc = -EINVAL; __be64 rcd_sn; + int rc; if (!ctx) - goto out; + return -EINVAL; - if (ctx->priv_ctx_tx) { - rc = -EEXIST; - goto out; - } + if (ctx->priv_ctx_tx) + return -EEXIST; start_marker_record = kmalloc(sizeof(*start_marker_record), GFP_KERNEL); - if (!start_marker_record) { - rc = -ENOMEM; - goto out; - } + if (!start_marker_record) + return -ENOMEM; offload_ctx = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_TX, GFP_KERNEL); if (!offload_ctx) { @@ -939,17 +983,11 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) if (skb) TCP_SKB_CB(skb)->eor = 1; - /* We support starting offload on multiple sockets - * concurrently, so we only need a read lock here. - * This lock must precede get_netdev_for_sock to prevent races between - * NETDEV_DOWN and setsockopt. - */ - down_read(&device_offload_lock); netdev = get_netdev_for_sock(sk); if (!netdev) { pr_err_ratelimited("%s: netdev not found\n", __func__); rc = -EINVAL; - goto release_lock; + goto disable_cad; } if (!(netdev->features & NETIF_F_HW_TLS_TX)) { @@ -960,10 +998,15 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) /* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event + * + * device_offload_lock is taken in tls_devices's NETDEV_DOWN + * handler thus protecting from the device going down before + * ctx was added to tls_device_list. */ + down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) { rc = -EINVAL; - goto release_netdev; + goto release_lock; } ctx->priv_ctx_tx = offload_ctx; @@ -971,9 +1014,10 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) &ctx->crypto_send.info, tcp_sk(sk)->write_seq); if (rc) - goto release_netdev; + goto release_lock; tls_device_attach(ctx, sk, netdev); + up_read(&device_offload_lock); /* following this assignment tls_is_sk_tx_device_offloaded * will return true and the context might be accessed @@ -981,13 +1025,14 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) */ smp_store_release(&sk->sk_validate_xmit_skb, tls_validate_xmit_skb); dev_put(netdev); - up_read(&device_offload_lock); - goto out; -release_netdev: - dev_put(netdev); + return 0; + release_lock: up_read(&device_offload_lock); +release_netdev: + dev_put(netdev); +disable_cad: clean_acked_data_disable(inet_csk(sk)); crypto_free_aead(offload_ctx->aead_send); free_rec_seq: @@ -999,7 +1044,6 @@ free_offload_ctx: ctx->priv_ctx_tx = NULL; free_marker_record: kfree(start_marker_record); -out: return rc; } @@ -1012,17 +1056,10 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) if (ctx->crypto_recv.info.version != TLS_1_2_VERSION) return -EOPNOTSUPP; - /* We support starting offload on multiple sockets - * concurrently, so we only need a read lock here. - * This lock must precede get_netdev_for_sock to prevent races between - * NETDEV_DOWN and setsockopt. - */ - down_read(&device_offload_lock); netdev = get_netdev_for_sock(sk); if (!netdev) { pr_err_ratelimited("%s: netdev not found\n", __func__); - rc = -EINVAL; - goto release_lock; + return -EINVAL; } if (!(netdev->features & NETIF_F_HW_TLS_RX)) { @@ -1033,16 +1070,21 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) /* Avoid offloading if the device is down * We don't want to offload new flows after * the NETDEV_DOWN event + * + * device_offload_lock is taken in tls_devices's NETDEV_DOWN + * handler thus protecting from the device going down before + * ctx was added to tls_device_list. */ + down_read(&device_offload_lock); if (!(netdev->flags & IFF_UP)) { rc = -EINVAL; - goto release_netdev; + goto release_lock; } context = kzalloc(TLS_OFFLOAD_CONTEXT_SIZE_RX, GFP_KERNEL); if (!context) { rc = -ENOMEM; - goto release_netdev; + goto release_lock; } context->resync_nh_reset = 1; @@ -1058,7 +1100,11 @@ int tls_set_device_offload_rx(struct sock *sk, struct tls_context *ctx) goto free_sw_resources; tls_device_attach(ctx, sk, netdev); - goto release_netdev; + up_read(&device_offload_lock); + + dev_put(netdev); + + return 0; free_sw_resources: up_read(&device_offload_lock); @@ -1066,10 +1112,10 @@ free_sw_resources: down_read(&device_offload_lock); release_ctx: ctx->priv_ctx_rx = NULL; -release_netdev: - dev_put(netdev); release_lock: up_read(&device_offload_lock); +release_netdev: + dev_put(netdev); return rc; } diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 9070d68a92a4..28895333701e 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -273,7 +273,7 @@ static int fill_sg_in(struct scatterlist *sg_in, __skb_frag_ref(frag); sg_set_page(sg_in + i, skb_frag_page(frag), - skb_frag_size(frag), frag->page_offset); + skb_frag_size(frag), skb_frag_off(frag)); remaining -= skb_frag_size(frag); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 43252a801c3f..ac88877dcade 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -39,6 +39,7 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/inetdevice.h> +#include <linux/inet_diag.h> #include <net/tls.h> @@ -251,14 +252,26 @@ static void tls_write_space(struct sock *sk) ctx->sk_write_space(sk); } -void tls_ctx_free(struct tls_context *ctx) +/** + * tls_ctx_free() - free TLS ULP context + * @sk: socket to with @ctx is attached + * @ctx: TLS context structure + * + * Free TLS context. If @sk is %NULL caller guarantees that the socket + * to which @ctx was attached has no outstanding references. + */ +void tls_ctx_free(struct sock *sk, struct tls_context *ctx) { if (!ctx) return; memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send)); memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv)); - kfree(ctx); + + if (sk) + kfree_rcu(ctx, rcu); + else + kfree(ctx); } static void tls_sk_proto_cleanup(struct sock *sk, @@ -273,19 +286,14 @@ static void tls_sk_proto_cleanup(struct sock *sk, kfree(ctx->tx.rec_seq); kfree(ctx->tx.iv); tls_sw_release_resources_tx(sk); -#ifdef CONFIG_TLS_DEVICE } else if (ctx->tx_conf == TLS_HW) { tls_device_free_resources_tx(sk); -#endif } if (ctx->rx_conf == TLS_SW) tls_sw_release_resources_rx(sk); - -#ifdef CONFIG_TLS_DEVICE - if (ctx->rx_conf == TLS_HW) + else if (ctx->rx_conf == TLS_HW) tls_device_offload_cleanup_rx(sk); -#endif } static void tls_sk_proto_close(struct sock *sk, long timeout) @@ -306,7 +314,7 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) write_lock_bh(&sk->sk_callback_lock); if (free_ctx) - icsk->icsk_ulp_data = NULL; + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); sk->sk_prot = ctx->sk_proto; if (sk->sk_write_space == tls_write_space) sk->sk_write_space = ctx->sk_write_space; @@ -318,10 +326,10 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) tls_sw_strparser_done(ctx); if (ctx->rx_conf == TLS_SW) tls_sw_free_ctx_rx(ctx); - ctx->sk_proto_close(sk, timeout); + ctx->sk_proto->close(sk, timeout); if (free_ctx) - tls_ctx_free(ctx); + tls_ctx_free(sk, ctx); } static int do_tls_getsockopt_tx(struct sock *sk, char __user *optval, @@ -438,7 +446,8 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, struct tls_context *ctx = tls_get_ctx(sk); if (level != SOL_TLS) - return ctx->getsockopt(sk, level, optname, optval, optlen); + return ctx->sk_proto->getsockopt(sk, level, + optname, optval, optlen); return do_tls_getsockopt(sk, optname, optval, optlen); } @@ -523,26 +532,18 @@ static int do_tls_setsockopt_conf(struct sock *sk, char __user *optval, } if (tx) { -#ifdef CONFIG_TLS_DEVICE rc = tls_set_device_offload(sk, ctx); conf = TLS_HW; if (rc) { -#else - { -#endif rc = tls_set_sw_offload(sk, ctx, 1); if (rc) goto err_crypto_info; conf = TLS_SW; } } else { -#ifdef CONFIG_TLS_DEVICE rc = tls_set_device_offload_rx(sk, ctx); conf = TLS_HW; if (rc) { -#else - { -#endif rc = tls_set_sw_offload(sk, ctx, 0); if (rc) goto err_crypto_info; @@ -596,7 +597,8 @@ static int tls_setsockopt(struct sock *sk, int level, int optname, struct tls_context *ctx = tls_get_ctx(sk); if (level != SOL_TLS) - return ctx->setsockopt(sk, level, optname, optval, optlen); + return ctx->sk_proto->setsockopt(sk, level, optname, optval, + optlen); return do_tls_setsockopt(sk, optname, optval, optlen); } @@ -610,11 +612,8 @@ static struct tls_context *create_ctx(struct sock *sk) if (!ctx) return NULL; - icsk->icsk_ulp_data = ctx; - ctx->setsockopt = sk->sk_prot->setsockopt; - ctx->getsockopt = sk->sk_prot->getsockopt; - ctx->sk_proto_close = sk->sk_prot->close; - ctx->unhash = sk->sk_prot->unhash; + rcu_assign_pointer(icsk->icsk_ulp_data, ctx); + ctx->sk_proto = sk->sk_prot; return ctx; } @@ -651,8 +650,8 @@ static void tls_hw_sk_destruct(struct sock *sk) ctx->sk_destruct(sk); /* Free ctx */ - tls_ctx_free(ctx); - icsk->icsk_ulp_data = NULL; + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); + tls_ctx_free(sk, ctx); } static int tls_hw_prot(struct sock *sk) @@ -670,9 +669,6 @@ static int tls_hw_prot(struct sock *sk) spin_unlock_bh(&device_spinlock); tls_build_proto(sk); - ctx->hash = sk->sk_prot->hash; - ctx->unhash = sk->sk_prot->unhash; - ctx->sk_proto_close = sk->sk_prot->close; ctx->sk_destruct = sk->sk_destruct; sk->sk_destruct = tls_hw_sk_destruct; ctx->rx_conf = TLS_HW_RECORD; @@ -704,7 +700,7 @@ static void tls_hw_unhash(struct sock *sk) } } spin_unlock_bh(&device_spinlock); - ctx->unhash(sk); + ctx->sk_proto->unhash(sk); } static int tls_hw_hash(struct sock *sk) @@ -713,7 +709,7 @@ static int tls_hw_hash(struct sock *sk) struct tls_device *dev; int err; - err = ctx->hash(sk); + err = ctx->sk_proto->hash(sk); spin_lock_bh(&device_spinlock); list_for_each_entry(dev, &device_list, dev_list) { if (dev->hash) { @@ -803,7 +799,6 @@ static int tls_init(struct sock *sk) ctx->tx_conf = TLS_BASE; ctx->rx_conf = TLS_BASE; - ctx->sk_proto = sk->sk_prot; update_sk_prot(sk, ctx); out: write_unlock_bh(&sk->sk_callback_lock); @@ -815,12 +810,71 @@ static void tls_update(struct sock *sk, struct proto *p) struct tls_context *ctx; ctx = tls_get_ctx(sk); - if (likely(ctx)) { - ctx->sk_proto_close = p->close; + if (likely(ctx)) ctx->sk_proto = p; - } else { + else sk->sk_prot = p; - } +} + +static int tls_get_info(const struct sock *sk, struct sk_buff *skb) +{ + u16 version, cipher_type; + struct tls_context *ctx; + struct nlattr *start; + int err; + + start = nla_nest_start_noflag(skb, INET_ULP_INFO_TLS); + if (!start) + return -EMSGSIZE; + + rcu_read_lock(); + ctx = rcu_dereference(inet_csk(sk)->icsk_ulp_data); + if (!ctx) { + err = 0; + goto nla_failure; + } + version = ctx->prot_info.version; + if (version) { + err = nla_put_u16(skb, TLS_INFO_VERSION, version); + if (err) + goto nla_failure; + } + cipher_type = ctx->prot_info.cipher_type; + if (cipher_type) { + err = nla_put_u16(skb, TLS_INFO_CIPHER, cipher_type); + if (err) + goto nla_failure; + } + err = nla_put_u16(skb, TLS_INFO_TXCONF, tls_user_config(ctx, true)); + if (err) + goto nla_failure; + + err = nla_put_u16(skb, TLS_INFO_RXCONF, tls_user_config(ctx, false)); + if (err) + goto nla_failure; + + rcu_read_unlock(); + nla_nest_end(skb, start); + return 0; + +nla_failure: + rcu_read_unlock(); + nla_nest_cancel(skb, start); + return err; +} + +static size_t tls_get_info_size(const struct sock *sk) +{ + size_t size = 0; + + size += nla_total_size(0) + /* INET_ULP_INFO_TLS */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_VERSION */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */ + nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */ + 0; + + return size; } void tls_register_device(struct tls_device *device) @@ -844,6 +898,8 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .owner = THIS_MODULE, .init = tls_init, .update = tls_update, + .get_info = tls_get_info, + .get_info_size = tls_get_info_size, }; static int __init tls_register(void) @@ -851,9 +907,7 @@ static int __init tls_register(void) tls_sw_proto_ops = inet_stream_ops; tls_sw_proto_ops.splice_read = tls_sw_splice_read; -#ifdef CONFIG_TLS_DEVICE tls_device_init(); -#endif tcp_register_ulp(&tcp_tls_ulp_ops); return 0; @@ -862,9 +916,7 @@ static int __init tls_register(void) static void __exit tls_unregister(void) { tcp_unregister_ulp(&tcp_tls_ulp_ops); -#ifdef CONFIG_TLS_DEVICE tls_device_cleanup(); -#endif } module_init(tls_register); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 91d21b048a9b..c2b5e0d2ba1a 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1489,13 +1489,12 @@ static int decrypt_skb_update(struct sock *sk, struct sk_buff *skb, int pad, err = 0; if (!ctx->decrypted) { -#ifdef CONFIG_TLS_DEVICE if (tls_ctx->rx_conf == TLS_HW) { err = tls_device_decrypted(sk, skb); if (err < 0) return err; } -#endif + /* Still not decrypted after tls_device */ if (!ctx->decrypted) { err = decrypt_internal(sk, skb, dest, NULL, chunk, zc, @@ -2014,10 +2013,9 @@ static int tls_read_size(struct strparser *strp, struct sk_buff *skb) ret = -EINVAL; goto read_failure; } -#ifdef CONFIG_TLS_DEVICE + tls_device_rx_resync_new_rec(strp->sk, data_len + TLS_HEADER_SIZE, TCP_SKB_CB(skb)->seq + rxm->offset); -#endif return data_len + TLS_HEADER_SIZE; read_failure: diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 9d864ebeb7b3..261521d286d6 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -77,11 +77,11 @@ struct hvs_send_buf { VMBUS_PKT_TRAILER_SIZE) union hvs_service_id { - uuid_le srv_id; + guid_t srv_id; struct { unsigned int svm_port; - unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)]; + unsigned char b[sizeof(guid_t) - sizeof(unsigned int)]; }; }; @@ -89,8 +89,8 @@ union hvs_service_id { struct hvsock { struct vsock_sock *vsk; - uuid_le vm_srv_id; - uuid_le host_srv_id; + guid_t vm_srv_id; + guid_t host_srv_id; struct vmbus_channel *chan; struct vmpacket_descriptor *recv_desc; @@ -159,21 +159,21 @@ struct hvsock { #define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1) /* 00000000-facb-11e6-bd58-64006a7986d3 */ -static const uuid_le srv_id_template = - UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, - 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3); +static const guid_t srv_id_template = + GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, + 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3); -static bool is_valid_srv_id(const uuid_le *id) +static bool is_valid_srv_id(const guid_t *id) { - return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4); + return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4); } -static unsigned int get_port_by_srv_id(const uuid_le *svr_id) +static unsigned int get_port_by_srv_id(const guid_t *svr_id) { return *((unsigned int *)svr_id); } -static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id) +static void hvs_addr_init(struct sockaddr_vm *addr, const guid_t *svr_id) { unsigned int port = get_port_by_srv_id(svr_id); @@ -321,7 +321,7 @@ static void hvs_close_connection(struct vmbus_channel *chan) static void hvs_open_connection(struct vmbus_channel *chan) { - uuid_le *if_instance, *if_type; + guid_t *if_instance, *if_type; unsigned char conn_from_host; struct sockaddr_vm addr; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 0815d1357861..082a30936690 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -307,6 +307,7 @@ static void virtio_vsock_rx_fill(struct virtio_vsock *vsock) break; } + pkt->buf_len = buf_len; pkt->len = buf_len; sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr)); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 6f1a8aff65c5..5bb70c692b1e 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -26,6 +26,9 @@ /* How long to wait for graceful shutdown of a connection */ #define VSOCK_CLOSE_TIMEOUT (8 * HZ) +/* Threshold for detecting small packets to copy */ +#define GOOD_COPY_LEN 128 + static const struct virtio_transport *virtio_transport_get_ops(void) { const struct vsock_transport *t = vsock_core_get_transport(); @@ -64,6 +67,9 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, pkt->buf = kmalloc(len, GFP_KERNEL); if (!pkt->buf) goto out_pkt; + + pkt->buf_len = len; + err = memcpy_from_msg(pkt->buf, info->msg, len); if (err) goto out; @@ -91,8 +97,17 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) struct virtio_vsock_pkt *pkt = opaque; struct af_vsockmon_hdr *hdr; struct sk_buff *skb; + size_t payload_len; + void *payload_buf; + + /* A packet could be split to fit the RX buffer, so we can retrieve + * the payload length from the header and the buffer pointer taking + * care of the offset in the original packet. + */ + payload_len = le32_to_cpu(pkt->hdr.len); + payload_buf = pkt->buf + pkt->off; - skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + pkt->len, + skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len, GFP_ATOMIC); if (!skb) return NULL; @@ -132,8 +147,8 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr)); - if (pkt->len) { - skb_put_data(skb, pkt->buf, pkt->len); + if (payload_len) { + skb_put_data(skb, payload_buf, payload_len); } return skb; @@ -166,8 +181,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk, vvs = vsk->trans; /* we can send less than pkt_len bytes */ - if (pkt_len > VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE) - pkt_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE; + if (pkt_len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) + pkt_len = VIRTIO_VSOCK_MAX_PKT_BUF_SIZE; /* virtio_transport_get_credit might return less than pkt_len credit */ pkt_len = virtio_transport_get_credit(vvs, pkt_len); @@ -204,10 +219,11 @@ static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs, void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt) { - spin_lock_bh(&vvs->tx_lock); + spin_lock_bh(&vvs->rx_lock); + vvs->last_fwd_cnt = vvs->fwd_cnt; pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt); pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc); - spin_unlock_bh(&vvs->tx_lock); + spin_unlock_bh(&vvs->rx_lock); } EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt); @@ -255,6 +271,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, struct virtio_vsock_sock *vvs = vsk->trans; struct virtio_vsock_pkt *pkt; size_t bytes, total = 0; + u32 free_space; int err = -EFAULT; spin_lock_bh(&vvs->rx_lock); @@ -285,11 +302,24 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk, virtio_transport_free_pkt(pkt); } } + + free_space = vvs->buf_alloc - (vvs->fwd_cnt - vvs->last_fwd_cnt); + spin_unlock_bh(&vvs->rx_lock); - /* Send a credit pkt to peer */ - virtio_transport_send_credit_update(vsk, VIRTIO_VSOCK_TYPE_STREAM, - NULL); + /* To reduce the number of credit update messages, + * don't update credits as long as lots of space is available. + * Note: the limit chosen here is arbitrary. Setting the limit + * too high causes extra messages. Too low causes transmitter + * stalls. As stalls are in theory more expensive than extra + * messages, we set the limit to a high value. TODO: experiment + * with different values. + */ + if (free_space < VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) { + virtio_transport_send_credit_update(vsk, + VIRTIO_VSOCK_TYPE_STREAM, + NULL); + } return total; @@ -841,24 +871,60 @@ destroy: return err; } +static void +virtio_transport_recv_enqueue(struct vsock_sock *vsk, + struct virtio_vsock_pkt *pkt) +{ + struct virtio_vsock_sock *vvs = vsk->trans; + bool free_pkt = false; + + pkt->len = le32_to_cpu(pkt->hdr.len); + pkt->off = 0; + + spin_lock_bh(&vvs->rx_lock); + + virtio_transport_inc_rx_pkt(vvs, pkt); + + /* Try to copy small packets into the buffer of last packet queued, + * to avoid wasting memory queueing the entire buffer with a small + * payload. + */ + if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) { + struct virtio_vsock_pkt *last_pkt; + + last_pkt = list_last_entry(&vvs->rx_queue, + struct virtio_vsock_pkt, list); + + /* If there is space in the last packet queued, we copy the + * new packet in its buffer. + */ + if (pkt->len <= last_pkt->buf_len - last_pkt->len) { + memcpy(last_pkt->buf + last_pkt->len, pkt->buf, + pkt->len); + last_pkt->len += pkt->len; + free_pkt = true; + goto out; + } + } + + list_add_tail(&pkt->list, &vvs->rx_queue); + +out: + spin_unlock_bh(&vvs->rx_lock); + if (free_pkt) + virtio_transport_free_pkt(pkt); +} + static int virtio_transport_recv_connected(struct sock *sk, struct virtio_vsock_pkt *pkt) { struct vsock_sock *vsk = vsock_sk(sk); - struct virtio_vsock_sock *vvs = vsk->trans; int err = 0; switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RW: - pkt->len = le32_to_cpu(pkt->hdr.len); - pkt->off = 0; - - spin_lock_bh(&vvs->rx_lock); - virtio_transport_inc_rx_pkt(vvs, pkt); - list_add_tail(&pkt->list, &vvs->rx_queue); - spin_unlock_bh(&vvs->rx_lock); - + virtio_transport_recv_enqueue(vsk, pkt); sk->sk_data_ready(sk); return err; case VIRTIO_VSOCK_OP_CREDIT_UPDATE: diff --git a/net/wimax/debugfs.c b/net/wimax/debugfs.c index 1af56df30276..3c54bb6b925a 100644 --- a/net/wimax/debugfs.c +++ b/net/wimax/debugfs.c @@ -13,49 +13,23 @@ #define D_SUBMODULE debugfs #include "debug-levels.h" - -#define __debugfs_register(prefix, name, parent) \ -do { \ - result = d_level_register_debugfs(prefix, name, parent); \ - if (result < 0) \ - goto error; \ -} while (0) - - -int wimax_debugfs_add(struct wimax_dev *wimax_dev) +void wimax_debugfs_add(struct wimax_dev *wimax_dev) { - int result; struct net_device *net_dev = wimax_dev->net_dev; - struct device *dev = net_dev->dev.parent; struct dentry *dentry; char buf[128]; snprintf(buf, sizeof(buf), "wimax:%s", net_dev->name); dentry = debugfs_create_dir(buf, NULL); - result = PTR_ERR(dentry); - if (IS_ERR(dentry)) { - if (result == -ENODEV) - result = 0; /* No debugfs support */ - else - dev_err(dev, "Can't create debugfs dentry: %d\n", - result); - goto out; - } wimax_dev->debugfs_dentry = dentry; - __debugfs_register("wimax_dl_", debugfs, dentry); - __debugfs_register("wimax_dl_", id_table, dentry); - __debugfs_register("wimax_dl_", op_msg, dentry); - __debugfs_register("wimax_dl_", op_reset, dentry); - __debugfs_register("wimax_dl_", op_rfkill, dentry); - __debugfs_register("wimax_dl_", op_state_get, dentry); - __debugfs_register("wimax_dl_", stack, dentry); - result = 0; -out: - return result; -error: - debugfs_remove_recursive(wimax_dev->debugfs_dentry); - return result; + d_level_register_debugfs("wimax_dl_", debugfs, dentry); + d_level_register_debugfs("wimax_dl_", id_table, dentry); + d_level_register_debugfs("wimax_dl_", op_msg, dentry); + d_level_register_debugfs("wimax_dl_", op_reset, dentry); + d_level_register_debugfs("wimax_dl_", op_rfkill, dentry); + d_level_register_debugfs("wimax_dl_", op_state_get, dentry); + d_level_register_debugfs("wimax_dl_", stack, dentry); } void wimax_debugfs_rm(struct wimax_dev *wimax_dev) diff --git a/net/wimax/stack.c b/net/wimax/stack.c index 1ba99d65feca..4b9b1c5e8f3a 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -481,12 +481,7 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev) /* Set up user-space interaction */ mutex_lock(&wimax_dev->mutex); wimax_id_table_add(wimax_dev); - result = wimax_debugfs_add(wimax_dev); - if (result < 0) { - dev_err(dev, "cannot initialize debugfs: %d\n", - result); - goto error_debugfs_add; - } + wimax_debugfs_add(wimax_dev); __wimax_state_set(wimax_dev, WIMAX_ST_DOWN); mutex_unlock(&wimax_dev->mutex); @@ -498,10 +493,6 @@ int wimax_dev_add(struct wimax_dev *wimax_dev, struct net_device *net_dev) d_fnend(3, dev, "(wimax_dev %p net_dev %p) = 0\n", wimax_dev, net_dev); return 0; -error_debugfs_add: - wimax_id_table_rm(wimax_dev); - mutex_unlock(&wimax_dev->mutex); - wimax_rfkill_rm(wimax_dev); error_rfkill_add: d_fnend(3, dev, "(wimax_dev %p net_dev %p) = %d\n", wimax_dev, net_dev, result); diff --git a/net/wimax/wimax-internal.h b/net/wimax/wimax-internal.h index e819a09337ee..40751207296c 100644 --- a/net/wimax/wimax-internal.h +++ b/net/wimax/wimax-internal.h @@ -57,13 +57,10 @@ void __wimax_state_set(struct wimax_dev *wimax_dev, enum wimax_st state) void __wimax_state_change(struct wimax_dev *, enum wimax_st); #ifdef CONFIG_DEBUG_FS -int wimax_debugfs_add(struct wimax_dev *); +void wimax_debugfs_add(struct wimax_dev *); void wimax_debugfs_rm(struct wimax_dev *); #else -static inline int wimax_debugfs_add(struct wimax_dev *wimax_dev) -{ - return 0; -} +static inline void wimax_debugfs_add(struct wimax_dev *wimax_dev) {} static inline void wimax_debugfs_rm(struct wimax_dev *wimax_dev) {} #endif diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 67f8360dfcee..63cf7131f601 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -217,6 +217,8 @@ config LIB80211_CRYPT_WEP config LIB80211_CRYPT_CCMP tristate + select CRYPTO_AES + select CRYPTO_CCM config LIB80211_CRYPT_TKIP tristate diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 7dc1bbd0888f..e851cafd8e2f 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -14,6 +14,11 @@ #include "core.h" #include "rdev-ops.h" +static bool cfg80211_valid_60g_freq(u32 freq) +{ + return freq >= 58320 && freq <= 70200; +} + void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan, enum nl80211_channel_type chan_type) @@ -23,6 +28,8 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, chandef->chan = chan; chandef->center_freq2 = 0; + chandef->edmg.bw_config = 0; + chandef->edmg.channels = 0; switch (chan_type) { case NL80211_CHAN_NO_HT: @@ -47,6 +54,91 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, } EXPORT_SYMBOL(cfg80211_chandef_create); +static bool cfg80211_edmg_chandef_valid(const struct cfg80211_chan_def *chandef) +{ + int max_contiguous = 0; + int num_of_enabled = 0; + int contiguous = 0; + int i; + + if (!chandef->edmg.channels || !chandef->edmg.bw_config) + return false; + + if (!cfg80211_valid_60g_freq(chandef->chan->center_freq)) + return false; + + for (i = 0; i < 6; i++) { + if (chandef->edmg.channels & BIT(i)) { + contiguous++; + num_of_enabled++; + } else { + contiguous = 0; + } + + max_contiguous = max(contiguous, max_contiguous); + } + /* basic verification of edmg configuration according to + * IEEE P802.11ay/D4.0 section 9.4.2.251 + */ + /* check bw_config against contiguous edmg channels */ + switch (chandef->edmg.bw_config) { + case IEEE80211_EDMG_BW_CONFIG_4: + case IEEE80211_EDMG_BW_CONFIG_8: + case IEEE80211_EDMG_BW_CONFIG_12: + if (max_contiguous < 1) + return false; + break; + case IEEE80211_EDMG_BW_CONFIG_5: + case IEEE80211_EDMG_BW_CONFIG_9: + case IEEE80211_EDMG_BW_CONFIG_13: + if (max_contiguous < 2) + return false; + break; + case IEEE80211_EDMG_BW_CONFIG_6: + case IEEE80211_EDMG_BW_CONFIG_10: + case IEEE80211_EDMG_BW_CONFIG_14: + if (max_contiguous < 3) + return false; + break; + case IEEE80211_EDMG_BW_CONFIG_7: + case IEEE80211_EDMG_BW_CONFIG_11: + case IEEE80211_EDMG_BW_CONFIG_15: + if (max_contiguous < 4) + return false; + break; + + default: + return false; + } + + /* check bw_config against aggregated (non contiguous) edmg channels */ + switch (chandef->edmg.bw_config) { + case IEEE80211_EDMG_BW_CONFIG_4: + case IEEE80211_EDMG_BW_CONFIG_5: + case IEEE80211_EDMG_BW_CONFIG_6: + case IEEE80211_EDMG_BW_CONFIG_7: + break; + case IEEE80211_EDMG_BW_CONFIG_8: + case IEEE80211_EDMG_BW_CONFIG_9: + case IEEE80211_EDMG_BW_CONFIG_10: + case IEEE80211_EDMG_BW_CONFIG_11: + if (num_of_enabled < 2) + return false; + break; + case IEEE80211_EDMG_BW_CONFIG_12: + case IEEE80211_EDMG_BW_CONFIG_13: + case IEEE80211_EDMG_BW_CONFIG_14: + case IEEE80211_EDMG_BW_CONFIG_15: + if (num_of_enabled < 4 || max_contiguous < 2) + return false; + break; + default: + return false; + } + + return true; +} + bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) { u32 control_freq; @@ -112,6 +204,10 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) return false; } + if (cfg80211_chandef_is_edmg(chandef) && + !cfg80211_edmg_chandef_valid(chandef)) + return false; + return true; } EXPORT_SYMBOL(cfg80211_chandef_valid); @@ -721,12 +817,66 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, return true; } +/* check if the operating channels are valid and supported */ +static bool cfg80211_edmg_usable(struct wiphy *wiphy, u8 edmg_channels, + enum ieee80211_edmg_bw_config edmg_bw_config, + int primary_channel, + struct ieee80211_edmg *edmg_cap) +{ + struct ieee80211_channel *chan; + int i, freq; + int channels_counter = 0; + + if (!edmg_channels && !edmg_bw_config) + return true; + + if ((!edmg_channels && edmg_bw_config) || + (edmg_channels && !edmg_bw_config)) + return false; + + if (!(edmg_channels & BIT(primary_channel - 1))) + return false; + + /* 60GHz channels 1..6 */ + for (i = 0; i < 6; i++) { + if (!(edmg_channels & BIT(i))) + continue; + + if (!(edmg_cap->channels & BIT(i))) + return false; + + channels_counter++; + + freq = ieee80211_channel_to_frequency(i + 1, + NL80211_BAND_60GHZ); + chan = ieee80211_get_channel(wiphy, freq); + if (!chan || chan->flags & IEEE80211_CHAN_DISABLED) + return false; + } + + /* IEEE802.11 allows max 4 channels */ + if (channels_counter > 4) + return false; + + /* check bw_config is a subset of what driver supports + * (see IEEE P802.11ay/D4.0 section 9.4.2.251, Table 13) + */ + if ((edmg_bw_config % 4) > (edmg_cap->bw_config % 4)) + return false; + + if (edmg_bw_config > edmg_cap->bw_config) + return false; + + return true; +} + bool cfg80211_chandef_usable(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, u32 prohibited_flags) { struct ieee80211_sta_ht_cap *ht_cap; struct ieee80211_sta_vht_cap *vht_cap; + struct ieee80211_edmg *edmg_cap; u32 width, control_freq, cap; if (WARN_ON(!cfg80211_chandef_valid(chandef))) @@ -734,6 +884,15 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy, ht_cap = &wiphy->bands[chandef->chan->band]->ht_cap; vht_cap = &wiphy->bands[chandef->chan->band]->vht_cap; + edmg_cap = &wiphy->bands[chandef->chan->band]->edmg_cap; + + if (edmg_cap->channels && + !cfg80211_edmg_usable(wiphy, + chandef->edmg.channels, + chandef->edmg.bw_config, + chandef->chan->hw_value, + edmg_cap)) + return false; control_freq = chandef->chan->center_freq; @@ -894,7 +1053,8 @@ static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy, if (chan == other_chan) return true; - if (chan->band != NL80211_BAND_5GHZ) + if (chan->band != NL80211_BAND_5GHZ && + chan->band != NL80211_BAND_6GHZ) continue; r1 = cfg80211_get_unii(chan->center_freq); diff --git a/net/wireless/core.c b/net/wireless/core.c index 32b3c719fdfc..350513744575 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 Intel Corporation + * Copyright (C) 2018-2019 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -142,12 +142,10 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, if (result) return result; - if (rdev->wiphy.debugfsdir && - !debugfs_rename(rdev->wiphy.debugfsdir->d_parent, - rdev->wiphy.debugfsdir, - rdev->wiphy.debugfsdir->d_parent, - newname)) - pr_err("failed to rename debugfs dir to %s!\n", newname); + if (rdev->wiphy.debugfsdir) + debugfs_rename(rdev->wiphy.debugfsdir->d_parent, + rdev->wiphy.debugfsdir, + rdev->wiphy.debugfsdir->d_parent, newname); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); @@ -302,12 +300,13 @@ static int cfg80211_rfkill_set_block(void *data, bool blocked) return 0; } -static void cfg80211_rfkill_sync_work(struct work_struct *work) +static void cfg80211_rfkill_block_work(struct work_struct *work) { struct cfg80211_registered_device *rdev; - rdev = container_of(work, struct cfg80211_registered_device, rfkill_sync); - cfg80211_rfkill_set_block(rdev, rfkill_blocked(rdev->rfkill)); + rdev = container_of(work, struct cfg80211_registered_device, + rfkill_block); + cfg80211_rfkill_set_block(rdev, true); } static void cfg80211_event_work(struct work_struct *work) @@ -518,7 +517,7 @@ use_default_name: return NULL; } - INIT_WORK(&rdev->rfkill_sync, cfg80211_rfkill_sync_work); + INIT_WORK(&rdev->rfkill_block, cfg80211_rfkill_block_work); INIT_WORK(&rdev->conn_work, cfg80211_conn_work); INIT_WORK(&rdev->event_work, cfg80211_event_work); @@ -899,11 +898,8 @@ int wiphy_register(struct wiphy *wiphy) cfg80211_rdev_list_generation++; /* add to debugfs */ - rdev->wiphy.debugfsdir = - debugfs_create_dir(wiphy_name(&rdev->wiphy), - ieee80211_debugfs_dir); - if (IS_ERR(rdev->wiphy.debugfsdir)) - rdev->wiphy.debugfsdir = NULL; + rdev->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&rdev->wiphy), + ieee80211_debugfs_dir); cfg80211_debugfs_rdev_add(rdev); nl80211_notify_wiphy(rdev, NL80211_CMD_NEW_WIPHY); @@ -1066,7 +1062,7 @@ void wiphy_rfkill_set_hw_state(struct wiphy *wiphy, bool blocked) struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); if (rfkill_set_hw_state(rdev->rfkill, blocked)) - schedule_work(&rdev->rfkill_sync); + schedule_work(&rdev->rfkill_block); } EXPORT_SYMBOL(wiphy_rfkill_set_hw_state); diff --git a/net/wireless/core.h b/net/wireless/core.h index ee8388fe4a92..ed487e324571 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -28,7 +28,7 @@ struct cfg80211_registered_device { /* rfkill support */ struct rfkill_ops rfkill_ops; struct rfkill *rfkill; - struct work_struct rfkill_sync; + struct work_struct rfkill_block; /* ISO / IEC 3166 alpha2 for which this device is receiving * country IEs on, this can help disregard country IEs from APs @@ -306,6 +306,8 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy); void cfg80211_bss_expire(struct cfg80211_registered_device *rdev); void cfg80211_bss_age(struct cfg80211_registered_device *rdev, unsigned long age_secs); +void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, + struct ieee80211_channel *channel); /* IBSS */ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index d1743e6abc34..ae8fe66a9bb8 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -104,13 +104,19 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, * use the mandatory rate set for 11b or * 11a for maximum compatibility. */ - struct ieee80211_supported_band *sband = - rdev->wiphy.bands[params->chandef.chan->band]; + struct ieee80211_supported_band *sband; + enum nl80211_band band; + u32 flag; int j; - u32 flag = params->chandef.chan->band == NL80211_BAND_5GHZ ? - IEEE80211_RATE_MANDATORY_A : - IEEE80211_RATE_MANDATORY_B; + band = params->chandef.chan->band; + if (band == NL80211_BAND_5GHZ || + band == NL80211_BAND_6GHZ) + flag = IEEE80211_RATE_MANDATORY_A; + else + flag = IEEE80211_RATE_MANDATORY_B; + + sband = rdev->wiphy.bands[band]; for (j = 0; j < sband->n_bitrates; j++) { if (sband->bitrates[j].flags & flag) params->basic_rates |= BIT(j); diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index 7e8ff9d7dcfa..6a5f08f7491e 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -22,6 +22,7 @@ #include <linux/ieee80211.h> #include <linux/crypto.h> +#include <crypto/aead.h> #include <net/lib80211.h> @@ -48,20 +49,13 @@ struct lib80211_ccmp_data { int key_idx; - struct crypto_cipher *tfm; + struct crypto_aead *tfm; /* scratch buffers for virt_to_page() (crypto API) */ - u8 tx_b0[AES_BLOCK_LEN], tx_b[AES_BLOCK_LEN], - tx_e[AES_BLOCK_LEN], tx_s0[AES_BLOCK_LEN]; - u8 rx_b0[AES_BLOCK_LEN], rx_b[AES_BLOCK_LEN], rx_a[AES_BLOCK_LEN]; + u8 tx_aad[2 * AES_BLOCK_LEN]; + u8 rx_aad[2 * AES_BLOCK_LEN]; }; -static inline void lib80211_ccmp_aes_encrypt(struct crypto_cipher *tfm, - const u8 pt[16], u8 ct[16]) -{ - crypto_cipher_encrypt_one(tfm, ct, pt); -} - static void *lib80211_ccmp_init(int key_idx) { struct lib80211_ccmp_data *priv; @@ -71,7 +65,7 @@ static void *lib80211_ccmp_init(int key_idx) goto fail; priv->key_idx = key_idx; - priv->tfm = crypto_alloc_cipher("aes", 0, 0); + priv->tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(priv->tfm)) { priv->tfm = NULL; goto fail; @@ -82,7 +76,7 @@ static void *lib80211_ccmp_init(int key_idx) fail: if (priv) { if (priv->tfm) - crypto_free_cipher(priv->tfm); + crypto_free_aead(priv->tfm); kfree(priv); } @@ -93,25 +87,16 @@ static void lib80211_ccmp_deinit(void *priv) { struct lib80211_ccmp_data *_priv = priv; if (_priv && _priv->tfm) - crypto_free_cipher(_priv->tfm); + crypto_free_aead(_priv->tfm); kfree(priv); } -static inline void xor_block(u8 * b, u8 * a, size_t len) -{ - int i; - for (i = 0; i < len; i++) - b[i] ^= a[i]; -} - -static void ccmp_init_blocks(struct crypto_cipher *tfm, - struct ieee80211_hdr *hdr, - u8 * pn, size_t dlen, u8 * b0, u8 * auth, u8 * s0) +static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr, + const u8 *pn, u8 *iv, u8 *aad) { u8 *pos, qc = 0; size_t aad_len; int a4_included, qc_included; - u8 aad[2 * AES_BLOCK_LEN]; a4_included = ieee80211_has_a4(hdr->frame_control); qc_included = ieee80211_is_data_qos(hdr->frame_control); @@ -127,17 +112,19 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm, aad_len += 2; } - /* CCM Initial Block: - * Flag (Include authentication header, M=3 (8-octet MIC), - * L=1 (2-octet Dlen)) - * Nonce: 0x00 | A2 | PN - * Dlen */ - b0[0] = 0x59; - b0[1] = qc; - memcpy(b0 + 2, hdr->addr2, ETH_ALEN); - memcpy(b0 + 8, pn, CCMP_PN_LEN); - b0[14] = (dlen >> 8) & 0xff; - b0[15] = dlen & 0xff; + /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC + * mode authentication are not allowed to collide, yet both are derived + * from the same vector. We only set L := 1 here to indicate that the + * data size can be represented in (L+1) bytes. The CCM layer will take + * care of storing the data length in the top (L+1) bytes and setting + * and clearing the other bits as is required to derive the two IVs. + */ + iv[0] = 0x1; + + /* Nonce: QC | A2 | PN */ + iv[1] = qc; + memcpy(iv + 2, hdr->addr2, ETH_ALEN); + memcpy(iv + 8, pn, CCMP_PN_LEN); /* AAD: * FC with bits 4..6 and 11..13 masked to zero; 14 is always one @@ -147,31 +134,20 @@ static void ccmp_init_blocks(struct crypto_cipher *tfm, * QC (if present) */ pos = (u8 *) hdr; - aad[0] = 0; /* aad_len >> 8 */ - aad[1] = aad_len & 0xff; - aad[2] = pos[0] & 0x8f; - aad[3] = pos[1] & 0xc7; - memcpy(aad + 4, hdr->addr1, 3 * ETH_ALEN); + aad[0] = pos[0] & 0x8f; + aad[1] = pos[1] & 0xc7; + memcpy(aad + 2, hdr->addr1, 3 * ETH_ALEN); pos = (u8 *) & hdr->seq_ctrl; - aad[22] = pos[0] & 0x0f; - aad[23] = 0; /* all bits masked */ - memset(aad + 24, 0, 8); + aad[20] = pos[0] & 0x0f; + aad[21] = 0; /* all bits masked */ + memset(aad + 22, 0, 8); if (a4_included) - memcpy(aad + 24, hdr->addr4, ETH_ALEN); + memcpy(aad + 22, hdr->addr4, ETH_ALEN); if (qc_included) { - aad[a4_included ? 30 : 24] = qc; + aad[a4_included ? 28 : 22] = qc; /* rest of QC masked */ } - - /* Start with the first block and AAD */ - lib80211_ccmp_aes_encrypt(tfm, b0, auth); - xor_block(auth, aad, AES_BLOCK_LEN); - lib80211_ccmp_aes_encrypt(tfm, auth, auth); - xor_block(auth, &aad[AES_BLOCK_LEN], AES_BLOCK_LEN); - lib80211_ccmp_aes_encrypt(tfm, auth, auth); - b0[0] &= 0x07; - b0[14] = b0[15] = 0; - lib80211_ccmp_aes_encrypt(tfm, b0, s0); + return aad_len; } static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, @@ -214,13 +190,13 @@ static int lib80211_ccmp_hdr(struct sk_buff *skb, int hdr_len, static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) { struct lib80211_ccmp_data *key = priv; - int data_len, i, blocks, last, len; - u8 *pos, *mic; struct ieee80211_hdr *hdr; - u8 *b0 = key->tx_b0; - u8 *b = key->tx_b; - u8 *e = key->tx_e; - u8 *s0 = key->tx_s0; + struct aead_request *req; + struct scatterlist sg[2]; + u8 *aad = key->tx_aad; + u8 iv[AES_BLOCK_LEN]; + int len, data_len, aad_len; + int ret; if (skb_tailroom(skb) < CCMP_MIC_LEN || skb->len < hdr_len) return -1; @@ -230,31 +206,28 @@ static int lib80211_ccmp_encrypt(struct sk_buff *skb, int hdr_len, void *priv) if (len < 0) return -1; - pos = skb->data + hdr_len + CCMP_HDR_LEN; + req = aead_request_alloc(key->tfm, GFP_ATOMIC); + if (!req) + return -ENOMEM; + hdr = (struct ieee80211_hdr *)skb->data; - ccmp_init_blocks(key->tfm, hdr, key->tx_pn, data_len, b0, b, s0); - - blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); - last = data_len % AES_BLOCK_LEN; - - for (i = 1; i <= blocks; i++) { - len = (i == blocks && last) ? last : AES_BLOCK_LEN; - /* Authentication */ - xor_block(b, pos, len); - lib80211_ccmp_aes_encrypt(key->tfm, b, b); - /* Encryption, with counter */ - b0[14] = (i >> 8) & 0xff; - b0[15] = i & 0xff; - lib80211_ccmp_aes_encrypt(key->tfm, b0, e); - xor_block(pos, e, len); - pos += len; - } + aad_len = ccmp_init_iv_and_aad(hdr, key->tx_pn, iv, aad); - mic = skb_put(skb, CCMP_MIC_LEN); - for (i = 0; i < CCMP_MIC_LEN; i++) - mic[i] = b[i] ^ s0[i]; + skb_put(skb, CCMP_MIC_LEN); - return 0; + sg_init_table(sg, 2); + sg_set_buf(&sg[0], aad, aad_len); + sg_set_buf(&sg[1], skb->data + hdr_len + CCMP_HDR_LEN, + data_len + CCMP_MIC_LEN); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_ad(req, aad_len); + aead_request_set_crypt(req, sg, sg, data_len, iv); + + ret = crypto_aead_encrypt(req); + aead_request_free(req); + + return ret; } /* @@ -283,13 +256,13 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) struct lib80211_ccmp_data *key = priv; u8 keyidx, *pos; struct ieee80211_hdr *hdr; - u8 *b0 = key->rx_b0; - u8 *b = key->rx_b; - u8 *a = key->rx_a; + struct aead_request *req; + struct scatterlist sg[2]; + u8 *aad = key->rx_aad; + u8 iv[AES_BLOCK_LEN]; u8 pn[6]; - int i, blocks, last, len; - size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN - CCMP_MIC_LEN; - u8 *mic = skb->data + skb->len - CCMP_MIC_LEN; + int aad_len, ret; + size_t data_len = skb->len - hdr_len - CCMP_HDR_LEN; if (skb->len < hdr_len + CCMP_HDR_LEN + CCMP_MIC_LEN) { key->dot11RSNAStatsCCMPFormatErrors++; @@ -337,28 +310,26 @@ static int lib80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) return -4; } - ccmp_init_blocks(key->tfm, hdr, pn, data_len, b0, a, b); - xor_block(mic, b, CCMP_MIC_LEN); - - blocks = DIV_ROUND_UP(data_len, AES_BLOCK_LEN); - last = data_len % AES_BLOCK_LEN; - - for (i = 1; i <= blocks; i++) { - len = (i == blocks && last) ? last : AES_BLOCK_LEN; - /* Decrypt, with counter */ - b0[14] = (i >> 8) & 0xff; - b0[15] = i & 0xff; - lib80211_ccmp_aes_encrypt(key->tfm, b0, b); - xor_block(pos, b, len); - /* Authentication */ - xor_block(a, pos, len); - lib80211_ccmp_aes_encrypt(key->tfm, a, a); - pos += len; - } + req = aead_request_alloc(key->tfm, GFP_ATOMIC); + if (!req) + return -ENOMEM; - if (memcmp(mic, a, CCMP_MIC_LEN) != 0) { - net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM\n", - hdr->addr2); + aad_len = ccmp_init_iv_and_aad(hdr, pn, iv, aad); + + sg_init_table(sg, 2); + sg_set_buf(&sg[0], aad, aad_len); + sg_set_buf(&sg[1], pos, data_len); + + aead_request_set_callback(req, 0, NULL, NULL); + aead_request_set_ad(req, aad_len); + aead_request_set_crypt(req, sg, sg, data_len, iv); + + ret = crypto_aead_decrypt(req); + aead_request_free(req); + + if (ret) { + net_dbg_ratelimited("CCMP: decrypt failed: STA=%pM (%d)\n", + hdr->addr2, ret); key->dot11RSNAStatsCCMPDecryptErrors++; return -5; } @@ -377,7 +348,7 @@ static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) { struct lib80211_ccmp_data *data = priv; int keyidx; - struct crypto_cipher *tfm = data->tfm; + struct crypto_aead *tfm = data->tfm; keyidx = data->key_idx; memset(data, 0, sizeof(*data)); @@ -394,7 +365,9 @@ static int lib80211_ccmp_set_key(void *key, int len, u8 * seq, void *priv) data->rx_pn[4] = seq[1]; data->rx_pn[5] = seq[0]; } - crypto_cipher_setkey(data->tfm, data->key, CCMP_TK_LEN); + if (crypto_aead_setauthsize(data->tfm, CCMP_MIC_LEN) || + crypto_aead_setkey(data->tfm, data->key, CCMP_TK_LEN)) + return -1; } else if (len == 0) data->key_set = 0; else diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fd05ae1437a9..d21b1581a665 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -281,7 +281,16 @@ nl80211_pmsr_attr_policy[NL80211_PMSR_ATTR_MAX + 1] = { NLA_POLICY_NESTED_ARRAY(nl80211_psmr_peer_attr_policy), }; +static const struct nla_policy +he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = { + [NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] = + NLA_POLICY_RANGE(NLA_U8, 1, 20), + [NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET] = + NLA_POLICY_RANGE(NLA_U8, 1, 20), +}; + const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { + [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, @@ -289,6 +298,13 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_EDMG_CHANNELS] = NLA_POLICY_RANGE(NLA_U8, + NL80211_EDMG_CHANNELS_MIN, + NL80211_EDMG_CHANNELS_MAX), + [NL80211_ATTR_WIPHY_EDMG_BW_CONFIG] = NLA_POLICY_RANGE(NLA_U8, + NL80211_EDMG_BW_CONFIG_MIN, + NL80211_EDMG_BW_CONFIG_MAX), + [NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 }, [NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 }, @@ -574,6 +590,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_SAE_PASSWORD] = { .type = NLA_BINARY, .len = SAE_PASSWORD_MAX_LEN }, [NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG }, + [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), }; /* policy for the key attributes */ @@ -667,6 +684,7 @@ static const struct nla_policy nl80211_match_band_rssi_policy[NUM_NL80211_BANDS] = { [NL80211_BAND_2GHZ] = { .type = NLA_S32 }, [NL80211_BAND_5GHZ] = { .type = NLA_S32 }, + [NL80211_BAND_6GHZ] = { .type = NLA_S32 }, [NL80211_BAND_60GHZ] = { .type = NLA_S32 }, }; @@ -749,17 +767,25 @@ int nl80211_prepare_wdev_dump(struct netlink_callback *cb, int err; if (!cb->args[0]) { + struct nlattr **attrbuf; + + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), + GFP_KERNEL); + if (!attrbuf) + return -ENOMEM; + err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, - genl_family_attrbuf(&nl80211_fam), - nl80211_fam.maxattr, + attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); - if (err) + if (err) { + kfree(attrbuf); return err; + } - *wdev = __cfg80211_wdev_from_attrs( - sock_net(cb->skb->sk), - genl_family_attrbuf(&nl80211_fam)); + *wdev = __cfg80211_wdev_from_attrs(sock_net(cb->skb->sk), + attrbuf); + kfree(attrbuf); if (IS_ERR(*wdev)) return PTR_ERR(*wdev); *rdev = wiphy_to_rdev((*wdev)->wiphy); @@ -1555,6 +1581,15 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg, nla_nest_end(msg, nl_iftype_data); } + /* add EDMG info */ + if (sband->edmg_cap.channels && + (nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_CHANNELS, + sband->edmg_cap.channels) || + nla_put_u8(msg, NL80211_BAND_ATTR_EDMG_BW_CONFIG, + sband->edmg_cap.bw_config))) + + return -ENOBUFS; + /* add bitrates */ nl_rates = nla_nest_start_noflag(msg, NL80211_BAND_ATTR_RATES); if (!nl_rates) @@ -2065,6 +2100,7 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, CMD(add_tx_ts, ADD_TX_TS); CMD(set_multicast_to_unicast, SET_MULTICAST_TO_UNICAST); CMD(update_connect_params, UPDATE_CONNECT_PARAMS); + CMD(update_ft_ies, UPDATE_FT_IES); } #undef CMD @@ -2172,6 +2208,30 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.vht_capa_mod_mask)) goto nla_put_failure; + if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, + rdev->wiphy.perm_addr)) + goto nla_put_failure; + + if (!is_zero_ether_addr(rdev->wiphy.addr_mask) && + nla_put(msg, NL80211_ATTR_MAC_MASK, ETH_ALEN, + rdev->wiphy.addr_mask)) + goto nla_put_failure; + + if (rdev->wiphy.n_addresses > 1) { + void *attr; + + attr = nla_nest_start(msg, NL80211_ATTR_MAC_ADDRS); + if (!attr) + goto nla_put_failure; + + for (i = 0; i < rdev->wiphy.n_addresses; i++) + if (nla_put(msg, i + 1, ETH_ALEN, + rdev->wiphy.addresses[i].addr)) + goto nla_put_failure; + + nla_nest_end(msg, attr); + } + state->split_start++; break; case 10: @@ -2366,14 +2426,21 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, struct netlink_callback *cb, struct nl80211_dump_wiphy_state *state) { - struct nlattr **tb = genl_family_attrbuf(&nl80211_fam); - int ret = nlmsg_parse_deprecated(cb->nlh, - GENL_HDRLEN + nl80211_fam.hdrsize, - tb, nl80211_fam.maxattr, - nl80211_policy, NULL); + struct nlattr **tb = kcalloc(NUM_NL80211_ATTR, sizeof(*tb), GFP_KERNEL); + int ret; + + if (!tb) + return -ENOMEM; + + ret = nlmsg_parse_deprecated(cb->nlh, + GENL_HDRLEN + nl80211_fam.hdrsize, + tb, nl80211_fam.maxattr, + nl80211_policy, NULL); /* ignore parse errors for backward compatibility */ - if (ret) - return 0; + if (ret) { + ret = 0; + goto out; + } state->split = tb[NL80211_ATTR_SPLIT_WIPHY_DUMP]; if (tb[NL80211_ATTR_WIPHY]) @@ -2386,8 +2453,10 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, int ifidx = nla_get_u32(tb[NL80211_ATTR_IFINDEX]); netdev = __dev_get_by_index(sock_net(skb->sk), ifidx); - if (!netdev) - return -ENODEV; + if (!netdev) { + ret = -ENODEV; + goto out; + } if (netdev->ieee80211_ptr) { rdev = wiphy_to_rdev( netdev->ieee80211_ptr->wiphy); @@ -2395,7 +2464,10 @@ static int nl80211_dump_wiphy_parse(struct sk_buff *skb, } } - return 0; + ret = 0; +out: + kfree(tb); + return ret; } static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) @@ -2622,6 +2694,18 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]); } + if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { + chandef->edmg.channels = + nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); + + if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) + chandef->edmg.bw_config = + nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); + } else { + chandef->edmg.bw_config = 0; + chandef->edmg.channels = 0; + } + if (!cfg80211_chandef_valid(chandef)) { NL_SET_ERR_MSG(extack, "invalid channel definition"); return -EINVAL; @@ -4359,6 +4443,34 @@ static int nl80211_parse_beacon(struct cfg80211_registered_device *rdev, return 0; } +static int nl80211_parse_he_obss_pd(struct nlattr *attrs, + struct ieee80211_he_obss_pd *he_obss_pd) +{ + struct nlattr *tb[NL80211_HE_OBSS_PD_ATTR_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NL80211_HE_OBSS_PD_ATTR_MAX, attrs, + he_obss_pd_policy, NULL); + if (err) + return err; + + if (!tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET] || + !tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]) + return -EINVAL; + + he_obss_pd->min_offset = + nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MIN_OFFSET]); + he_obss_pd->max_offset = + nla_get_u32(tb[NL80211_HE_OBSS_PD_ATTR_MAX_OFFSET]); + + if (he_obss_pd->min_offset >= he_obss_pd->max_offset) + return -EINVAL; + + he_obss_pd->enable = true; + + return 0; +} + static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, const u8 *rates) { @@ -4643,6 +4755,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) params.twt_responder = nla_get_flag(info->attrs[NL80211_ATTR_TWT_RESPONDER]); + if (info->attrs[NL80211_ATTR_HE_OBSS_PD]) { + err = nl80211_parse_he_obss_pd( + info->attrs[NL80211_ATTR_HE_OBSS_PD], + ¶ms.he_obss_pd); + if (err) + return err; + } + nl80211_calculate_ap_params(¶ms); if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) @@ -4941,6 +5061,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, PUT_SINFO(CONNECTED_TIME, connected_time, u32); PUT_SINFO(INACTIVE_TIME, inactive_time, u32); + PUT_SINFO_U64(ASSOC_AT_BOOTTIME, assoc_at); if (sinfo->filled & (BIT_ULL(NL80211_STA_INFO_RX_BYTES) | BIT_ULL(NL80211_STA_INFO_RX_BYTES64)) && @@ -8685,6 +8806,10 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_SCAN, survey->time_scan, NL80211_SURVEY_INFO_PAD)) goto nla_put_failure; + if ((survey->filled & SURVEY_INFO_TIME_BSS_RX) && + nla_put_u64_64bit(msg, NL80211_SURVEY_INFO_TIME_BSS_RX, + survey->time_bss_rx, NL80211_SURVEY_INFO_PAD)) + goto nla_put_failure; nla_nest_end(msg, infoattr); @@ -8698,7 +8823,7 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) { - struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + struct nlattr **attrbuf; struct survey_info survey; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; @@ -8706,6 +8831,10 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) int res; bool radio_stats; + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); + if (!attrbuf) + return -ENOMEM; + rtnl_lock(); res = nl80211_prepare_wdev_dump(cb, &rdev, &wdev); if (res) @@ -8750,6 +8879,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb) cb->args[2] = survey_idx; res = skb->len; out_err: + kfree(attrbuf); rtnl_unlock(); return res; } @@ -9609,6 +9739,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct cfg80211_registered_device *rdev; + struct nlattr **attrbuf = NULL; int err; long phy_idx; void *data = NULL; @@ -9629,7 +9760,12 @@ static int nl80211_testmode_dump(struct sk_buff *skb, goto out_err; } } else { - struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), + GFP_KERNEL); + if (!attrbuf) { + err = -ENOMEM; + goto out_err; + } err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, @@ -9696,6 +9832,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, /* see above */ cb->args[0] = phy_idx + 1; out_err: + kfree(attrbuf); rtnl_unlock(); return err; } @@ -9790,6 +9927,15 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { + connect.edmg.channels = + nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); + + if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) + connect.edmg.bw_config = + nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); + } + if (connect.privacy && info->attrs[NL80211_ATTR_KEYS]) { connkeys = nl80211_parse_connkeys(rdev, info, NULL); if (IS_ERR(connkeys)) @@ -10659,9 +10805,11 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, hyst = wdev->cqm_config->rssi_hyst; n = wdev->cqm_config->n_rssi_thresholds; - for (i = 0; i < n; i++) + for (i = 0; i < n; i++) { + i = array_index_nospec(i, n); if (last < wdev->cqm_config->rssi_thresholds[i]) break; + } low_index = i - 1; if (low_index >= 0) { @@ -12789,7 +12937,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, struct cfg80211_registered_device **rdev, struct wireless_dev **wdev) { - struct nlattr **attrbuf = genl_family_attrbuf(&nl80211_fam); + struct nlattr **attrbuf; u32 vid, subcmd; unsigned int i; int vcmd_idx = -1; @@ -12820,24 +12968,32 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, return 0; } + attrbuf = kcalloc(NUM_NL80211_ATTR, sizeof(*attrbuf), GFP_KERNEL); + if (!attrbuf) + return -ENOMEM; + err = nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN + nl80211_fam.hdrsize, attrbuf, nl80211_fam.maxattr, nl80211_policy, NULL); if (err) - return err; + goto out; if (!attrbuf[NL80211_ATTR_VENDOR_ID] || - !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) - return -EINVAL; + !attrbuf[NL80211_ATTR_VENDOR_SUBCMD]) { + err = -EINVAL; + goto out; + } *wdev = __cfg80211_wdev_from_attrs(sock_net(skb->sk), attrbuf); if (IS_ERR(*wdev)) *wdev = NULL; *rdev = __cfg80211_rdev_from_attrs(sock_net(skb->sk), attrbuf); - if (IS_ERR(*rdev)) - return PTR_ERR(*rdev); + if (IS_ERR(*rdev)) { + err = PTR_ERR(*rdev); + goto out; + } vid = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_ID]); subcmd = nla_get_u32(attrbuf[NL80211_ATTR_VENDOR_SUBCMD]); @@ -12850,15 +13006,19 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, if (vcmd->info.vendor_id != vid || vcmd->info.subcmd != subcmd) continue; - if (!vcmd->dumpit) - return -EOPNOTSUPP; + if (!vcmd->dumpit) { + err = -EOPNOTSUPP; + goto out; + } vcmd_idx = i; break; } - if (vcmd_idx < 0) - return -EOPNOTSUPP; + if (vcmd_idx < 0) { + err = -EOPNOTSUPP; + goto out; + } if (attrbuf[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(attrbuf[NL80211_ATTR_VENDOR_DATA]); @@ -12869,7 +13029,7 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, attrbuf[NL80211_ATTR_VENDOR_DATA], cb->extack); if (err) - return err; + goto out; } /* 0 is the first index - add 1 to parse only once */ @@ -12881,7 +13041,10 @@ static int nl80211_prepare_vendor_dump(struct sk_buff *skb, cb->args[4] = data_len; /* keep rtnl locked in successful case */ - return 0; + err = 0; +out: + kfree(attrbuf); + return err; } static int nl80211_vendor_cmd_dump(struct sk_buff *skb, @@ -14559,6 +14722,7 @@ static struct genl_family nl80211_fam __ro_after_init = { .n_ops = ARRAY_SIZE(nl80211_ops), .mcgrps = nl80211_mcgrps, .n_mcgrps = ARRAY_SIZE(nl80211_mcgrps), + .parallel_ops = true, }; /* notification functions */ @@ -14835,12 +14999,10 @@ void nl80211_common_reg_change_event(enum nl80211_commands cmd_id, return; hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id); - if (!hdr) { - nlmsg_free(msg); - return; - } + if (!hdr) + goto nla_put_failure; - if (nl80211_reg_change_event_fill(msg, request) == false) + if (!nl80211_reg_change_event_fill(msg, request)) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -16090,7 +16252,9 @@ void cfg80211_ch_switch_notify(struct net_device *dev, if (wdev->iftype == NL80211_IFTYPE_STATION && !WARN_ON(!wdev->current_bss)) - wdev->current_bss->pub.channel = chandef->chan; + cfg80211_update_assoc_bss_entry(wdev, chandef->chan); + + cfg80211_sched_dfs_chan_update(rdev); nl80211_ch_switch_notify(rdev, dev, chandef, GFP_KERNEL, NL80211_CMD_CH_SWITCH_NOTIFY, 0); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 327479ce69f5..5311d0ae2454 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -3806,8 +3806,9 @@ void wiphy_regulatory_deregister(struct wiphy *wiphy) } /* - * See http://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii, for - * UNII band definitions + * See FCC notices for UNII band definitions + * 5GHz: https://www.fcc.gov/document/5-ghz-unlicensed-spectrum-unii + * 6GHz: https://www.fcc.gov/document/fcc-proposes-more-spectrum-unlicensed-use-0 */ int cfg80211_get_unii(int freq) { @@ -3831,6 +3832,22 @@ int cfg80211_get_unii(int freq) if (freq > 5725 && freq <= 5825) return 4; + /* UNII-5 */ + if (freq > 5925 && freq <= 6425) + return 5; + + /* UNII-6 */ + if (freq > 6425 && freq <= 6525) + return 6; + + /* UNII-7 */ + if (freq > 6525 && freq <= 6875) + return 7; + + /* UNII-8 */ + if (freq > 6875 && freq <= 7125) + return 8; + return -EINVAL; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d66e6d4b7555..d313c9befa23 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1091,6 +1091,93 @@ struct cfg80211_non_tx_bss { u8 bssid_index; }; +static bool +cfg80211_update_known_bss(struct cfg80211_registered_device *rdev, + struct cfg80211_internal_bss *known, + struct cfg80211_internal_bss *new, + bool signal_valid) +{ + lockdep_assert_held(&rdev->bss_lock); + + /* Update IEs */ + if (rcu_access_pointer(new->pub.proberesp_ies)) { + const struct cfg80211_bss_ies *old; + + old = rcu_access_pointer(known->pub.proberesp_ies); + + rcu_assign_pointer(known->pub.proberesp_ies, + new->pub.proberesp_ies); + /* Override possible earlier Beacon frame IEs */ + rcu_assign_pointer(known->pub.ies, + new->pub.proberesp_ies); + if (old) + kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); + } else if (rcu_access_pointer(new->pub.beacon_ies)) { + const struct cfg80211_bss_ies *old; + struct cfg80211_internal_bss *bss; + + if (known->pub.hidden_beacon_bss && + !list_empty(&known->hidden_list)) { + const struct cfg80211_bss_ies *f; + + /* The known BSS struct is one of the probe + * response members of a group, but we're + * receiving a beacon (beacon_ies in the new + * bss is used). This can only mean that the + * AP changed its beacon from not having an + * SSID to showing it, which is confusing so + * drop this information. + */ + + f = rcu_access_pointer(new->pub.beacon_ies); + kfree_rcu((struct cfg80211_bss_ies *)f, rcu_head); + return false; + } + + old = rcu_access_pointer(known->pub.beacon_ies); + + rcu_assign_pointer(known->pub.beacon_ies, new->pub.beacon_ies); + + /* Override IEs if they were from a beacon before */ + if (old == rcu_access_pointer(known->pub.ies)) + rcu_assign_pointer(known->pub.ies, new->pub.beacon_ies); + + /* Assign beacon IEs to all sub entries */ + list_for_each_entry(bss, &known->hidden_list, hidden_list) { + const struct cfg80211_bss_ies *ies; + + ies = rcu_access_pointer(bss->pub.beacon_ies); + WARN_ON(ies != old); + + rcu_assign_pointer(bss->pub.beacon_ies, + new->pub.beacon_ies); + } + + if (old) + kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head); + } + + known->pub.beacon_interval = new->pub.beacon_interval; + + /* don't update the signal if beacon was heard on + * adjacent channel. + */ + if (signal_valid) + known->pub.signal = new->pub.signal; + known->pub.capability = new->pub.capability; + known->ts = new->ts; + known->ts_boottime = new->ts_boottime; + known->parent_tsf = new->parent_tsf; + known->pub.chains = new->pub.chains; + memcpy(known->pub.chain_signal, new->pub.chain_signal, + IEEE80211_MAX_CHAINS); + ether_addr_copy(known->parent_bssid, new->parent_bssid); + known->pub.max_bssid_indicator = new->pub.max_bssid_indicator; + known->pub.bssid_index = new->pub.bssid_index; + + return true; +} + /* Returned bss is reference counted and must be cleaned up appropriately. */ struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *rdev, @@ -1114,88 +1201,8 @@ cfg80211_bss_update(struct cfg80211_registered_device *rdev, found = rb_find_bss(rdev, tmp, BSS_CMP_REGULAR); if (found) { - /* Update IEs */ - if (rcu_access_pointer(tmp->pub.proberesp_ies)) { - const struct cfg80211_bss_ies *old; - - old = rcu_access_pointer(found->pub.proberesp_ies); - - rcu_assign_pointer(found->pub.proberesp_ies, - tmp->pub.proberesp_ies); - /* Override possible earlier Beacon frame IEs */ - rcu_assign_pointer(found->pub.ies, - tmp->pub.proberesp_ies); - if (old) - kfree_rcu((struct cfg80211_bss_ies *)old, - rcu_head); - } else if (rcu_access_pointer(tmp->pub.beacon_ies)) { - const struct cfg80211_bss_ies *old; - struct cfg80211_internal_bss *bss; - - if (found->pub.hidden_beacon_bss && - !list_empty(&found->hidden_list)) { - const struct cfg80211_bss_ies *f; - - /* - * The found BSS struct is one of the probe - * response members of a group, but we're - * receiving a beacon (beacon_ies in the tmp - * bss is used). This can only mean that the - * AP changed its beacon from not having an - * SSID to showing it, which is confusing so - * drop this information. - */ - - f = rcu_access_pointer(tmp->pub.beacon_ies); - kfree_rcu((struct cfg80211_bss_ies *)f, - rcu_head); - goto drop; - } - - old = rcu_access_pointer(found->pub.beacon_ies); - - rcu_assign_pointer(found->pub.beacon_ies, - tmp->pub.beacon_ies); - - /* Override IEs if they were from a beacon before */ - if (old == rcu_access_pointer(found->pub.ies)) - rcu_assign_pointer(found->pub.ies, - tmp->pub.beacon_ies); - - /* Assign beacon IEs to all sub entries */ - list_for_each_entry(bss, &found->hidden_list, - hidden_list) { - const struct cfg80211_bss_ies *ies; - - ies = rcu_access_pointer(bss->pub.beacon_ies); - WARN_ON(ies != old); - - rcu_assign_pointer(bss->pub.beacon_ies, - tmp->pub.beacon_ies); - } - - if (old) - kfree_rcu((struct cfg80211_bss_ies *)old, - rcu_head); - } - - found->pub.beacon_interval = tmp->pub.beacon_interval; - /* - * don't update the signal if beacon was heard on - * adjacent channel. - */ - if (signal_valid) - found->pub.signal = tmp->pub.signal; - found->pub.capability = tmp->pub.capability; - found->ts = tmp->ts; - found->ts_boottime = tmp->ts_boottime; - found->parent_tsf = tmp->parent_tsf; - found->pub.chains = tmp->pub.chains; - memcpy(found->pub.chain_signal, tmp->pub.chain_signal, - IEEE80211_MAX_CHAINS); - ether_addr_copy(found->parent_bssid, tmp->parent_bssid); - found->pub.max_bssid_indicator = tmp->pub.max_bssid_indicator; - found->pub.bssid_index = tmp->pub.bssid_index; + if (!cfg80211_update_known_bss(rdev, found, tmp, signal_valid)) + goto drop; } else { struct cfg80211_internal_bss *new; struct cfg80211_internal_bss *hidden; @@ -1368,6 +1375,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, struct cfg80211_internal_bss tmp = {}, *res; int bss_type; bool signal_valid; + unsigned long ts; if (WARN_ON(!wiphy)) return NULL; @@ -1390,8 +1398,11 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, tmp.ts_boottime = data->boottime_ns; if (non_tx_data) { tmp.pub.transmitted_bss = non_tx_data->tx_bss; + ts = bss_from_pub(non_tx_data->tx_bss)->ts; tmp.pub.bssid_index = non_tx_data->bssid_index; tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator; + } else { + ts = jiffies; } /* @@ -1425,8 +1436,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, signal_valid = abs(data->chan->center_freq - channel->center_freq) <= wiphy->max_adj_channel_rssi_comp; - res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, - jiffies); + res = cfg80211_bss_update(wiphy_to_rdev(wiphy), &tmp, signal_valid, ts); if (!res) return NULL; @@ -1440,7 +1450,7 @@ cfg80211_inform_single_bss_data(struct wiphy *wiphy, regulatory_hint_found_beacon(wiphy, channel, gfp); } - if (non_tx_data && non_tx_data->tx_bss) { + if (non_tx_data) { /* this is a nontransmitting bss, we need to add it to * transmitting bss' list if it is not there */ @@ -1659,6 +1669,8 @@ cfg80211_inform_bss_data(struct wiphy *wiphy, res = cfg80211_inform_single_bss_data(wiphy, data, ftype, bssid, tsf, capability, beacon_interval, ie, ielen, NULL, gfp); + if (!res) + return NULL; non_tx_data.tx_bss = res; cfg80211_parse_mbssid_data(wiphy, data, ftype, bssid, tsf, beacon_interval, ie, ielen, &non_tx_data, @@ -1776,7 +1788,6 @@ static struct cfg80211_bss * cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, struct cfg80211_inform_bss *data, struct ieee80211_mgmt *mgmt, size_t len, - struct cfg80211_non_tx_bss *non_tx_data, gfp_t gfp) { struct cfg80211_internal_bss tmp = {}, *res; @@ -1835,11 +1846,6 @@ cfg80211_inform_single_bss_frame_data(struct wiphy *wiphy, tmp.pub.chains = data->chains; memcpy(tmp.pub.chain_signal, data->chain_signal, IEEE80211_MAX_CHAINS); ether_addr_copy(tmp.parent_bssid, data->parent_bssid); - if (non_tx_data) { - tmp.pub.transmitted_bss = non_tx_data->tx_bss; - tmp.pub.bssid_index = non_tx_data->bssid_index; - tmp.pub.max_bssid_indicator = non_tx_data->max_bssid_indicator; - } signal_valid = abs(data->chan->center_freq - channel->center_freq) <= wiphy->max_adj_channel_rssi_comp; @@ -1877,7 +1883,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct cfg80211_non_tx_bss non_tx_data; res = cfg80211_inform_single_bss_frame_data(wiphy, data, mgmt, - len, NULL, gfp); + len, gfp); if (!res || !wiphy->support_mbssid || !cfg80211_find_ie(WLAN_EID_MULTIPLE_BSSID, ie, ielen)) return res; @@ -1995,6 +2001,85 @@ void cfg80211_bss_iter(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_bss_iter); +void cfg80211_update_assoc_bss_entry(struct wireless_dev *wdev, + struct ieee80211_channel *chan) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct cfg80211_internal_bss *cbss = wdev->current_bss; + struct cfg80211_internal_bss *new = NULL; + struct cfg80211_internal_bss *bss; + struct cfg80211_bss *nontrans_bss; + struct cfg80211_bss *tmp; + + spin_lock_bh(&rdev->bss_lock); + + if (WARN_ON(cbss->pub.channel == chan)) + goto done; + + /* use transmitting bss */ + if (cbss->pub.transmitted_bss) + cbss = container_of(cbss->pub.transmitted_bss, + struct cfg80211_internal_bss, + pub); + + cbss->pub.channel = chan; + + list_for_each_entry(bss, &rdev->bss_list, list) { + if (!cfg80211_bss_type_match(bss->pub.capability, + bss->pub.channel->band, + wdev->conn_bss_type)) + continue; + + if (bss == cbss) + continue; + + if (!cmp_bss(&bss->pub, &cbss->pub, BSS_CMP_REGULAR)) { + new = bss; + break; + } + } + + if (new) { + /* to save time, update IEs for transmitting bss only */ + if (cfg80211_update_known_bss(rdev, cbss, new, false)) { + new->pub.proberesp_ies = NULL; + new->pub.beacon_ies = NULL; + } + + list_for_each_entry_safe(nontrans_bss, tmp, + &new->pub.nontrans_list, + nontrans_list) { + bss = container_of(nontrans_bss, + struct cfg80211_internal_bss, pub); + if (__cfg80211_unlink_bss(rdev, bss)) + rdev->bss_generation++; + } + + WARN_ON(atomic_read(&new->hold)); + if (!WARN_ON(!__cfg80211_unlink_bss(rdev, new))) + rdev->bss_generation++; + } + + rb_erase(&cbss->rbn, &rdev->bss_tree); + rb_insert_bss(rdev, cbss); + rdev->bss_generation++; + + list_for_each_entry_safe(nontrans_bss, tmp, + &cbss->pub.nontrans_list, + nontrans_list) { + bss = container_of(nontrans_bss, + struct cfg80211_internal_bss, pub); + bss->pub.channel = chan; + rb_erase(&bss->rbn, &rdev->bss_tree); + rb_insert_bss(rdev, bss); + rdev->bss_generation++; + } + +done: + spin_unlock_bh(&rdev->bss_lock); +} + #ifdef CONFIG_CFG80211_WEXT static struct cfg80211_registered_device * cfg80211_get_dev_from_ifindex(struct net *net, int ifindex) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 4fbb91a511ae..d98ad2b3143b 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2446,10 +2446,11 @@ TRACE_EVENT(rdev_set_mcast_rate, sizeof(int) * NUM_NL80211_BANDS); ), TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " - "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 60GHz=0x%x]", + "mcast_rates [2.4GHz=0x%x, 5.2GHz=0x%x, 6GHz=0x%x, 60GHz=0x%x]", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mcast_rate[NL80211_BAND_2GHZ], __entry->mcast_rate[NL80211_BAND_5GHZ], + __entry->mcast_rate[NL80211_BAND_6GHZ], __entry->mcast_rate[NL80211_BAND_60GHZ]) ); diff --git a/net/wireless/util.c b/net/wireless/util.c index e74837824cea..419eb12c1e93 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -91,6 +91,11 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band) else return 5000 + chan * 5; break; + case NL80211_BAND_6GHZ: + /* see 802.11ax D4.1 27.3.22.2 */ + if (chan <= 253) + return 5940 + chan * 5; + break; case NL80211_BAND_60GHZ: if (chan < 7) return 56160 + chan * 2160; @@ -111,8 +116,11 @@ int ieee80211_frequency_to_channel(int freq) return (freq - 2407) / 5; else if (freq >= 4910 && freq <= 4980) return (freq - 4000) / 5; - else if (freq <= 45000) /* DMG band lower limit */ + else if (freq < 5945) return (freq - 5000) / 5; + else if (freq <= 45000) /* DMG band lower limit */ + /* see 802.11ax D4.1 27.3.22.2 */ + return (freq - 5940) / 5; else if (freq >= 58320 && freq <= 70200) return (freq - 56160) / 2160; else @@ -148,6 +156,7 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) switch (sband->band) { case NL80211_BAND_5GHZ: + case NL80211_BAND_6GHZ: want = 3; for (i = 0; i < sband->n_bitrates; i++) { if (sband->bitrates[i].bitrate == 60 || @@ -960,6 +969,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, } cfg80211_process_rdev_events(rdev); + cfg80211_mlme_purge_registrations(dev->ieee80211_ptr); } err = rdev_change_virtual_intf(rdev, dev, ntype, params); @@ -1039,7 +1049,7 @@ static u32 cfg80211_calculate_bitrate_ht(struct rate_info *rate) return (bitrate + 50000) / 100000; } -static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) +static u32 cfg80211_calculate_bitrate_dmg(struct rate_info *rate) { static const u32 __mcs2bitrate[] = { /* control PHY */ @@ -1086,6 +1096,40 @@ static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) return __mcs2bitrate[rate->mcs]; } +static u32 cfg80211_calculate_bitrate_edmg(struct rate_info *rate) +{ + static const u32 __mcs2bitrate[] = { + /* control PHY */ + [0] = 275, + /* SC PHY */ + [1] = 3850, + [2] = 7700, + [3] = 9625, + [4] = 11550, + [5] = 12512, /* 1251.25 mbps */ + [6] = 13475, + [7] = 15400, + [8] = 19250, + [9] = 23100, + [10] = 25025, + [11] = 26950, + [12] = 30800, + [13] = 38500, + [14] = 46200, + [15] = 50050, + [16] = 53900, + [17] = 57750, + [18] = 69300, + [19] = 75075, + [20] = 80850, + }; + + if (WARN_ON_ONCE(rate->mcs >= ARRAY_SIZE(__mcs2bitrate))) + return 0; + + return __mcs2bitrate[rate->mcs] * rate->n_bonded_ch; +} + static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) { static const u32 base[4][10] = { @@ -1258,8 +1302,10 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate) { if (rate->flags & RATE_INFO_FLAGS_MCS) return cfg80211_calculate_bitrate_ht(rate); - if (rate->flags & RATE_INFO_FLAGS_60G) - return cfg80211_calculate_bitrate_60g(rate); + if (rate->flags & RATE_INFO_FLAGS_DMG) + return cfg80211_calculate_bitrate_dmg(rate); + if (rate->flags & RATE_INFO_FLAGS_EDMG) + return cfg80211_calculate_bitrate_edmg(rate); if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) return cfg80211_calculate_bitrate_vht(rate); if (rate->flags & RATE_INFO_FLAGS_HE_MCS) @@ -1471,6 +1517,9 @@ bool ieee80211_operating_class_to_band(u8 operating_class, case 128 ... 130: *band = NL80211_BAND_5GHZ; return true; + case 131 ... 135: + *band = NL80211_BAND_6GHZ; + return true; case 81: case 82: case 83: diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 46e4d69db845..7b6529d81c61 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,6 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net> + * Copyright (C) 2019 Intel Corporation */ #include <linux/export.h> @@ -864,8 +865,8 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, } } } else { - rfkill_set_sw_state(rdev->rfkill, true); - schedule_work(&rdev->rfkill_sync); + if (rfkill_set_sw_state(rdev->rfkill, true)) + schedule_work(&rdev->rfkill_block); return 0; } diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 688aac7a6943..947b8ff0227e 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -14,6 +14,7 @@ #include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/idr.h> +#include <linux/vmalloc.h> #include "xdp_umem.h" #include "xsk_queue.h" @@ -105,14 +106,22 @@ int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, umem->dev = dev; umem->queue_id = queue_id; + if (flags & XDP_USE_NEED_WAKEUP) { + umem->flags |= XDP_UMEM_USES_NEED_WAKEUP; + /* Tx needs to be explicitly woken up the first time. + * Also for supporting drivers that do not implement this + * feature. They will always have to call sendto(). + */ + xsk_set_tx_need_wakeup(umem); + } + dev_hold(dev); if (force_copy) /* For copy-mode, we are done. */ return 0; - if (!dev->netdev_ops->ndo_bpf || - !dev->netdev_ops->ndo_xsk_async_xmit) { + if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_wakeup) { err = -EOPNOTSUPP; goto err_unreg_umem; } @@ -164,6 +173,37 @@ void xdp_umem_clear_dev(struct xdp_umem *umem) umem->zc = false; } +static void xdp_umem_unmap_pages(struct xdp_umem *umem) +{ + unsigned int i; + + for (i = 0; i < umem->npgs; i++) + if (PageHighMem(umem->pgs[i])) + vunmap(umem->pages[i].addr); +} + +static int xdp_umem_map_pages(struct xdp_umem *umem) +{ + unsigned int i; + void *addr; + + for (i = 0; i < umem->npgs; i++) { + if (PageHighMem(umem->pgs[i])) + addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL); + else + addr = page_address(umem->pgs[i]); + + if (!addr) { + xdp_umem_unmap_pages(umem); + return -ENOMEM; + } + + umem->pages[i].addr = addr; + } + + return 0; +} + static void xdp_umem_unpin_pages(struct xdp_umem *umem) { unsigned int i; @@ -207,6 +247,7 @@ static void xdp_umem_release(struct xdp_umem *umem) xsk_reuseq_destroy(umem); + xdp_umem_unmap_pages(umem); xdp_umem_unpin_pages(umem); kfree(umem->pages); @@ -299,10 +340,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { + bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; u32 chunk_size = mr->chunk_size, headroom = mr->headroom; unsigned int chunks, chunks_per_page; u64 addr = mr->addr, size = mr->len; - int size_chk, err, i; + int size_chk, err; if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { /* Strictly speaking we could support this, if: @@ -314,7 +356,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - if (!is_power_of_2(chunk_size)) + if (mr->flags & ~(XDP_UMEM_UNALIGNED_CHUNK_FLAG | + XDP_UMEM_USES_NEED_WAKEUP)) + return -EINVAL; + + if (!unaligned_chunks && !is_power_of_2(chunk_size)) return -EINVAL; if (!PAGE_ALIGNED(addr)) { @@ -331,9 +377,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) if (chunks == 0) return -EINVAL; - chunks_per_page = PAGE_SIZE / chunk_size; - if (chunks < chunks_per_page || chunks % chunks_per_page) - return -EINVAL; + if (!unaligned_chunks) { + chunks_per_page = PAGE_SIZE / chunk_size; + if (chunks < chunks_per_page || chunks % chunks_per_page) + return -EINVAL; + } headroom = ALIGN(headroom, 64); @@ -342,13 +390,15 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; umem->address = (unsigned long)addr; - umem->chunk_mask = ~((u64)chunk_size - 1); + umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK + : ~((u64)chunk_size - 1); umem->size = size; umem->headroom = headroom; umem->chunk_size_nohr = chunk_size - headroom; umem->npgs = size / PAGE_SIZE; umem->pgs = NULL; umem->user = NULL; + umem->flags = mr->flags; INIT_LIST_HEAD(&umem->xsk_list); spin_lock_init(&umem->xsk_list_lock); @@ -368,10 +418,11 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) goto out_pin; } - for (i = 0; i < umem->npgs; i++) - umem->pages[i].addr = page_address(umem->pgs[i]); + err = xdp_umem_map_pages(umem); + if (!err) + return 0; - return 0; + kfree(umem->pages); out_pin: xdp_umem_unpin_pages(umem); diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 59b57d708697..c2f1af3b6a7c 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL(xsk_umem_has_addrs); u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) { - return xskq_peek_addr(umem->fq, addr); + return xskq_peek_addr(umem->fq, addr, umem); } EXPORT_SYMBOL(xsk_umem_peek_addr); @@ -55,21 +55,103 @@ void xsk_umem_discard_addr(struct xdp_umem *umem) } EXPORT_SYMBOL(xsk_umem_discard_addr); +void xsk_set_rx_need_wakeup(struct xdp_umem *umem) +{ + if (umem->need_wakeup & XDP_WAKEUP_RX) + return; + + umem->fq->ring->flags |= XDP_RING_NEED_WAKEUP; + umem->need_wakeup |= XDP_WAKEUP_RX; +} +EXPORT_SYMBOL(xsk_set_rx_need_wakeup); + +void xsk_set_tx_need_wakeup(struct xdp_umem *umem) +{ + struct xdp_sock *xs; + + if (umem->need_wakeup & XDP_WAKEUP_TX) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; + } + rcu_read_unlock(); + + umem->need_wakeup |= XDP_WAKEUP_TX; +} +EXPORT_SYMBOL(xsk_set_tx_need_wakeup); + +void xsk_clear_rx_need_wakeup(struct xdp_umem *umem) +{ + if (!(umem->need_wakeup & XDP_WAKEUP_RX)) + return; + + umem->fq->ring->flags &= ~XDP_RING_NEED_WAKEUP; + umem->need_wakeup &= ~XDP_WAKEUP_RX; +} +EXPORT_SYMBOL(xsk_clear_rx_need_wakeup); + +void xsk_clear_tx_need_wakeup(struct xdp_umem *umem) +{ + struct xdp_sock *xs; + + if (!(umem->need_wakeup & XDP_WAKEUP_TX)) + return; + + rcu_read_lock(); + list_for_each_entry_rcu(xs, &umem->xsk_list, list) { + xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP; + } + rcu_read_unlock(); + + umem->need_wakeup &= ~XDP_WAKEUP_TX; +} +EXPORT_SYMBOL(xsk_clear_tx_need_wakeup); + +bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem) +{ + return umem->flags & XDP_UMEM_USES_NEED_WAKEUP; +} +EXPORT_SYMBOL(xsk_umem_uses_need_wakeup); + +/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for + * each page. This is only required in copy mode. + */ +static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf, + u32 len, u32 metalen) +{ + void *to_buf = xdp_umem_get_data(umem, addr); + + addr = xsk_umem_add_offset_to_addr(addr); + if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) { + void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr; + u64 page_start = addr & ~(PAGE_SIZE - 1); + u64 first_len = PAGE_SIZE - (addr - page_start); + + memcpy(to_buf, from_buf, first_len + metalen); + memcpy(next_pg_addr, from_buf + first_len, len - first_len); + + return; + } + + memcpy(to_buf, from_buf, len + metalen); +} + static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - void *to_buf, *from_buf; + u64 offset = xs->umem->headroom; + u64 addr, memcpy_addr; + void *from_buf; u32 metalen; - u64 addr; int err; - if (!xskq_peek_addr(xs->umem->fq, &addr) || + if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { xs->rx_dropped++; return -ENOSPC; } - addr += xs->umem->headroom; - if (unlikely(xdp_data_meta_unsupported(xdp))) { from_buf = xdp->data; metalen = 0; @@ -78,9 +160,11 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) metalen = xdp->data - xdp->data_meta; } - to_buf = xdp_umem_get_data(xs->umem, addr); - memcpy(to_buf, from_buf, len + metalen); - addr += metalen; + memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset); + __xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen); + + offset += metalen; + addr = xsk_umem_adjust_offset(xs->umem, addr, offset); err = xskq_produce_batch_desc(xs->rx, addr, len); if (!err) { xskq_discard_addr(xs->umem->fq); @@ -102,10 +186,23 @@ static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) return err; } +static bool xsk_is_bound(struct xdp_sock *xs) +{ + if (READ_ONCE(xs->state) == XSK_BOUND) { + /* Matches smp_wmb() in bind(). */ + smp_rmb(); + return true; + } + return false; +} + int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 len; + if (!xsk_is_bound(xs)) + return -EINVAL; + if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) return -EINVAL; @@ -125,6 +222,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { u32 metalen = xdp->data - xdp->data_meta; u32 len = xdp->data_end - xdp->data; + u64 offset = xs->umem->headroom; void *buffer; u64 addr; int err; @@ -136,17 +234,17 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) goto out_unlock; } - if (!xskq_peek_addr(xs->umem->fq, &addr) || + if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { err = -ENOSPC; goto out_drop; } - addr += xs->umem->headroom; - + addr = xsk_umem_adjust_offset(xs->umem, addr, offset); buffer = xdp_umem_get_data(xs->umem, addr); memcpy(buffer, xdp->data_meta, len + metalen); - addr += metalen; + + addr = xsk_umem_adjust_offset(xs->umem, addr, metalen); err = xskq_produce_batch_desc(xs->rx, addr, len); if (err) goto out_drop; @@ -190,7 +288,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) rcu_read_lock(); list_for_each_entry_rcu(xs, &umem->xsk_list, list) { - if (!xskq_peek_desc(xs->tx, desc)) + if (!xskq_peek_desc(xs->tx, desc, umem)) continue; if (xskq_produce_addr_lazy(umem->cq, desc->addr)) @@ -212,7 +310,8 @@ static int xsk_zc_xmit(struct sock *sk) struct xdp_sock *xs = xdp_sk(sk); struct net_device *dev = xs->dev; - return dev->netdev_ops->ndo_xsk_async_xmit(dev, xs->queue_id); + return dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + XDP_WAKEUP_TX); } static void xsk_destruct_skb(struct sk_buff *skb) @@ -243,7 +342,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; - while (xskq_peek_desc(xs->tx, &desc)) { + while (xskq_peek_desc(xs->tx, &desc, xs->umem)) { char *buffer; u64 addr; u32 len; @@ -272,7 +371,7 @@ static int xsk_generic_xmit(struct sock *sk, struct msghdr *m, skb->dev = xs->dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_shinfo(skb)->destructor_arg = (void *)(long)addr; + skb_shinfo(skb)->destructor_arg = (void *)(long)desc.addr; skb->destructor = xsk_destruct_skb; err = dev_direct_xmit(skb, xs->queue_id); @@ -301,7 +400,7 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); - if (unlikely(!xs->dev)) + if (unlikely(!xsk_is_bound(xs))) return -ENXIO; if (unlikely(!(xs->dev->flags & IFF_UP))) return -ENETDOWN; @@ -317,8 +416,19 @@ static unsigned int xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { unsigned int mask = datagram_poll(file, sock, wait); - struct sock *sk = sock->sk; - struct xdp_sock *xs = xdp_sk(sk); + struct xdp_sock *xs = xdp_sk(sock->sk); + struct net_device *dev; + struct xdp_umem *umem; + + if (unlikely(!xsk_is_bound(xs))) + return mask; + + dev = xs->dev; + umem = xs->umem; + + if (umem->need_wakeup) + dev->netdev_ops->ndo_xsk_wakeup(dev, xs->queue_id, + umem->need_wakeup); if (xs->rx && !xskq_empty_desc(xs->rx)) mask |= POLLIN | POLLRDNORM; @@ -342,7 +452,7 @@ static int xsk_init_queue(u32 entries, struct xsk_queue **queue, /* Make sure queue is ready before it can be seen by others */ smp_wmb(); - *queue = q; + WRITE_ONCE(*queue, q); return 0; } @@ -350,10 +460,9 @@ static void xsk_unbind_dev(struct xdp_sock *xs) { struct net_device *dev = xs->dev; - if (!dev || xs->state != XSK_BOUND) + if (xs->state != XSK_BOUND) return; - - xs->state = XSK_UNBOUND; + WRITE_ONCE(xs->state, XSK_UNBOUND); /* Wait for driver to stop using the xdp socket. */ xdp_del_sk_umem(xs->umem, xs); @@ -362,6 +471,52 @@ static void xsk_unbind_dev(struct xdp_sock *xs) dev_put(dev); } +static struct xsk_map *xsk_get_map_list_entry(struct xdp_sock *xs, + struct xdp_sock ***map_entry) +{ + struct xsk_map *map = NULL; + struct xsk_map_node *node; + + *map_entry = NULL; + + spin_lock_bh(&xs->map_list_lock); + node = list_first_entry_or_null(&xs->map_list, struct xsk_map_node, + node); + if (node) { + WARN_ON(xsk_map_inc(node->map)); + map = node->map; + *map_entry = node->map_entry; + } + spin_unlock_bh(&xs->map_list_lock); + return map; +} + +static void xsk_delete_from_maps(struct xdp_sock *xs) +{ + /* This function removes the current XDP socket from all the + * maps it resides in. We need to take extra care here, due to + * the two locks involved. Each map has a lock synchronizing + * updates to the entries, and each socket has a lock that + * synchronizes access to the list of maps (map_list). For + * deadlock avoidance the locks need to be taken in the order + * "map lock"->"socket map list lock". We start off by + * accessing the socket map list, and take a reference to the + * map to guarantee existence between the + * xsk_get_map_list_entry() and xsk_map_try_sock_delete() + * calls. Then we ask the map to remove the socket, which + * tries to remove the socket from the map. Note that there + * might be updates to the map between + * xsk_get_map_list_entry() and xsk_map_try_sock_delete(). + */ + struct xdp_sock **map_entry = NULL; + struct xsk_map *map; + + while ((map = xsk_get_map_list_entry(xs, &map_entry))) { + xsk_map_try_sock_delete(map, xs, map_entry); + xsk_map_put(map); + } +} + static int xsk_release(struct socket *sock) { struct sock *sk = sock->sk; @@ -381,7 +536,10 @@ static int xsk_release(struct socket *sock) sock_prot_inuse_add(net, sk->sk_prot, -1); local_bh_enable(); + xsk_delete_from_maps(xs); + mutex_lock(&xs->mutex); xsk_unbind_dev(xs); + mutex_unlock(&xs->mutex); xskq_destroy(xs->rx); xskq_destroy(xs->tx); @@ -412,6 +570,24 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd) return sock; } +/* Check if umem pages are contiguous. + * If zero-copy mode, use the DMA address to do the page contiguity check + * For all other modes we use addr (kernel virtual address) + * Store the result in the low bits of addr. + */ +static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags) +{ + struct xdp_umem_page *pgs = umem->pages; + int i, is_contig; + + for (i = 0; i < umem->npgs - 1; i++) { + is_contig = (flags & XDP_ZEROCOPY) ? + (pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) : + (pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr); + pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT; + } +} + static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr; @@ -427,7 +603,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) return -EINVAL; flags = sxdp->sxdp_flags; - if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY)) + if (flags & ~(XDP_SHARED_UMEM | XDP_COPY | XDP_ZEROCOPY | + XDP_USE_NEED_WAKEUP)) return -EINVAL; rtnl_lock(); @@ -454,7 +631,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) struct xdp_sock *umem_xs; struct socket *sock; - if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY)) { + if ((flags & XDP_COPY) || (flags & XDP_ZEROCOPY) || + (flags & XDP_USE_NEED_WAKEUP)) { /* Cannot specify flags for shared sockets. */ err = -EINVAL; goto out_unlock; @@ -473,19 +651,19 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) } umem_xs = xdp_sk(sock->sk); - if (!umem_xs->umem) { - /* No umem to inherit. */ + if (!xsk_is_bound(umem_xs)) { err = -EBADF; sockfd_put(sock); goto out_unlock; - } else if (umem_xs->dev != dev || umem_xs->queue_id != qid) { + } + if (umem_xs->dev != dev || umem_xs->queue_id != qid) { err = -EINVAL; sockfd_put(sock); goto out_unlock; } xdp_get_umem(umem_xs->umem); - xs->umem = umem_xs->umem; + WRITE_ONCE(xs->umem, umem_xs->umem); sockfd_put(sock); } else if (!xs->umem || !xdp_umem_validate_queues(xs->umem)) { err = -EINVAL; @@ -500,6 +678,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); if (err) goto out_unlock; + + xsk_check_page_contiguity(xs->umem, flags); } xs->dev = dev; @@ -510,16 +690,28 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xdp_add_sk_umem(xs->umem, xs); out_unlock: - if (err) + if (err) { dev_put(dev); - else - xs->state = XSK_BOUND; + } else { + /* Matches smp_rmb() in bind() for shared umem + * sockets, and xsk_is_bound(). + */ + smp_wmb(); + WRITE_ONCE(xs->state, XSK_BOUND); + } out_release: mutex_unlock(&xs->mutex); rtnl_unlock(); return err; } +struct xdp_umem_reg_v1 { + __u64 addr; /* Start of packet data area */ + __u64 len; /* Length of packet data area */ + __u32 chunk_size; + __u32 headroom; +}; + static int xsk_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { @@ -549,15 +741,24 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, } q = (optname == XDP_TX_RING) ? &xs->tx : &xs->rx; err = xsk_init_queue(entries, q, false); + if (!err && optname == XDP_TX_RING) + /* Tx needs to be explicitly woken up the first time */ + xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP; mutex_unlock(&xs->mutex); return err; } case XDP_UMEM_REG: { - struct xdp_umem_reg mr; + size_t mr_size = sizeof(struct xdp_umem_reg); + struct xdp_umem_reg mr = {}; struct xdp_umem *umem; - if (copy_from_user(&mr, optval, sizeof(mr))) + if (optlen < sizeof(struct xdp_umem_reg_v1)) + return -EINVAL; + else if (optlen < sizeof(mr)) + mr_size = sizeof(struct xdp_umem_reg_v1); + + if (copy_from_user(&mr, optval, mr_size)) return -EFAULT; mutex_lock(&xs->mutex); @@ -574,7 +775,7 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, /* Make sure umem is ready before it can be seen by others */ smp_wmb(); - xs->umem = umem; + WRITE_ONCE(xs->umem, umem); mutex_unlock(&xs->mutex); return 0; } @@ -610,6 +811,20 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; } +static void xsk_enter_rxtx_offsets(struct xdp_ring_offset_v1 *ring) +{ + ring->producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); + ring->consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); + ring->desc = offsetof(struct xdp_rxtx_ring, desc); +} + +static void xsk_enter_umem_offsets(struct xdp_ring_offset_v1 *ring) +{ + ring->producer = offsetof(struct xdp_umem_ring, ptrs.producer); + ring->consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); + ring->desc = offsetof(struct xdp_umem_ring, desc); +} + static int xsk_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { @@ -649,26 +864,49 @@ static int xsk_getsockopt(struct socket *sock, int level, int optname, case XDP_MMAP_OFFSETS: { struct xdp_mmap_offsets off; + struct xdp_mmap_offsets_v1 off_v1; + bool flags_supported = true; + void *to_copy; - if (len < sizeof(off)) + if (len < sizeof(off_v1)) return -EINVAL; + else if (len < sizeof(off)) + flags_supported = false; + + if (flags_supported) { + /* xdp_ring_offset is identical to xdp_ring_offset_v1 + * except for the flags field added to the end. + */ + xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) + &off.rx); + xsk_enter_rxtx_offsets((struct xdp_ring_offset_v1 *) + &off.tx); + xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) + &off.fr); + xsk_enter_umem_offsets((struct xdp_ring_offset_v1 *) + &off.cr); + off.rx.flags = offsetof(struct xdp_rxtx_ring, + ptrs.flags); + off.tx.flags = offsetof(struct xdp_rxtx_ring, + ptrs.flags); + off.fr.flags = offsetof(struct xdp_umem_ring, + ptrs.flags); + off.cr.flags = offsetof(struct xdp_umem_ring, + ptrs.flags); + + len = sizeof(off); + to_copy = &off; + } else { + xsk_enter_rxtx_offsets(&off_v1.rx); + xsk_enter_rxtx_offsets(&off_v1.tx); + xsk_enter_umem_offsets(&off_v1.fr); + xsk_enter_umem_offsets(&off_v1.cr); + + len = sizeof(off_v1); + to_copy = &off_v1; + } - off.rx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); - off.rx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); - off.rx.desc = offsetof(struct xdp_rxtx_ring, desc); - off.tx.producer = offsetof(struct xdp_rxtx_ring, ptrs.producer); - off.tx.consumer = offsetof(struct xdp_rxtx_ring, ptrs.consumer); - off.tx.desc = offsetof(struct xdp_rxtx_ring, desc); - - off.fr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); - off.fr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); - off.fr.desc = offsetof(struct xdp_umem_ring, desc); - off.cr.producer = offsetof(struct xdp_umem_ring, ptrs.producer); - off.cr.consumer = offsetof(struct xdp_umem_ring, ptrs.consumer); - off.cr.desc = offsetof(struct xdp_umem_ring, desc); - - len = sizeof(off); - if (copy_to_user(optval, &off, len)) + if (copy_to_user(optval, to_copy, len)) return -EFAULT; if (put_user(len, optlen)) return -EFAULT; @@ -713,7 +951,7 @@ static int xsk_mmap(struct file *file, struct socket *sock, unsigned long pfn; struct page *qpg; - if (xs->state != XSK_READY) + if (READ_ONCE(xs->state) != XSK_READY) return -EBUSY; if (offset == XDP_PGOFF_RX_RING) { @@ -855,6 +1093,9 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol, spin_lock_init(&xs->rx_lock); spin_lock_init(&xs->tx_completion_lock); + INIT_LIST_HEAD(&xs->map_list); + spin_lock_init(&xs->map_list_lock); + mutex_lock(&net->xdp.lock); sk_add_node_rcu(sk, &net->xdp.list); mutex_unlock(&net->xdp.lock); diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index ba8120610426..4cfd106bdb53 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -4,6 +4,19 @@ #ifndef XSK_H_ #define XSK_H_ +struct xdp_ring_offset_v1 { + __u64 producer; + __u64 consumer; + __u64 desc; +}; + +struct xdp_mmap_offsets_v1 { + struct xdp_ring_offset_v1 rx; + struct xdp_ring_offset_v1 tx; + struct xdp_ring_offset_v1 fr; + struct xdp_ring_offset_v1 cr; +}; + static inline struct xdp_sock *xdp_sk(struct sock *sk) { return (struct xdp_sock *)sk; diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c index d5e06c8e0cbf..f59791ba43a0 100644 --- a/net/xdp/xsk_diag.c +++ b/net/xdp/xsk_diag.c @@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb) du.id = umem->id; du.size = umem->size; du.num_pages = umem->npgs; - du.chunk_size = (__u32)(~umem->chunk_mask + 1); + du.chunk_size = umem->chunk_size_nohr + umem->headroom; du.headroom = umem->headroom; du.ifindex = umem->dev ? umem->dev->ifindex : 0; du.queue_id = umem->queue_id; @@ -97,6 +97,7 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, msg->xdiag_ino = sk_ino; sock_diag_save_cookie(sk, msg->xdiag_cookie); + mutex_lock(&xs->mutex); if ((req->xdiag_show & XDP_SHOW_INFO) && xsk_diag_put_info(xs, nlskb)) goto out_nlmsg_trim; @@ -117,10 +118,12 @@ static int xsk_diag_fill(struct sock *sk, struct sk_buff *nlskb, sock_diag_put_meminfo(sk, nlskb, XDP_DIAG_MEMINFO)) goto out_nlmsg_trim; + mutex_unlock(&xs->mutex); nlmsg_end(nlskb, nlh); return 0; out_nlmsg_trim: + mutex_unlock(&xs->mutex); nlmsg_cancel(nlskb, nlh); return -EMSGSIZE; } diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 909c5168ed0f..eddae4688862 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -16,6 +16,7 @@ struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; + u32 flags; }; /* Used for the RX and TX queues for packets */ @@ -133,6 +134,17 @@ static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt) /* UMEM queue */ +static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr, + u64 length) +{ + bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; + bool next_pg_contig = + (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr & + XSK_NEXT_PG_CONTIG_MASK; + + return cross_pg && !next_pg_contig; +} + static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) { if (addr >= q->size) { @@ -143,23 +155,51 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) return true; } -static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr) +static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr, + u64 length, + struct xdp_umem *umem) +{ + u64 base_addr = xsk_umem_extract_addr(addr); + + addr = xsk_umem_add_offset_to_addr(addr); + if (base_addr >= q->size || addr >= q->size || + xskq_crosses_non_contig_pg(umem, addr, length)) { + q->invalid_descs++; + return false; + } + + return true; +} + +static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr, + struct xdp_umem *umem) { while (q->cons_tail != q->cons_head) { struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; + + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { + if (xskq_is_valid_addr_unaligned(q, *addr, + umem->chunk_size_nohr, + umem)) + return addr; + goto out; + } + if (xskq_is_valid_addr(q, *addr)) return addr; +out: q->cons_tail++; } return NULL; } -static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr) +static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr, + struct xdp_umem *umem) { if (q->cons_tail == q->cons_head) { smp_mb(); /* D, matches A */ @@ -170,7 +210,7 @@ static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr) smp_rmb(); } - return xskq_validate_addr(q, addr); + return xskq_validate_addr(q, addr, umem); } static inline void xskq_discard_addr(struct xsk_queue *q) @@ -229,8 +269,21 @@ static inline int xskq_reserve_addr(struct xsk_queue *q) /* Rx/Tx queue */ -static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) +static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, + struct xdp_umem *umem) { + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { + if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem)) + return false; + + if (d->len > umem->chunk_size_nohr || d->options) { + q->invalid_descs++; + return false; + } + + return true; + } + if (!xskq_is_valid_addr(q, d->addr)) return false; @@ -244,14 +297,15 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) } static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, - struct xdp_desc *desc) + struct xdp_desc *desc, + struct xdp_umem *umem) { while (q->cons_tail != q->cons_head) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; *desc = READ_ONCE(ring->desc[idx]); - if (xskq_is_valid_desc(q, desc)) + if (xskq_is_valid_desc(q, desc, umem)) return desc; q->cons_tail++; @@ -261,7 +315,8 @@ static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, } static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, - struct xdp_desc *desc) + struct xdp_desc *desc, + struct xdp_umem *umem) { if (q->cons_tail == q->cons_head) { smp_mb(); /* D, matches A */ @@ -272,7 +327,7 @@ static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, smp_rmb(); /* C, matches B */ } - return xskq_validate_desc(q, desc); + return xskq_validate_desc(q, desc, umem); } static inline void xskq_discard_desc(struct xsk_queue *q) diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index 32c364d3bfb3..4d422447aadc 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -85,7 +85,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) if (dlen < len) len = dlen; - frag->page_offset = 0; + skb_frag_off_set(frag, 0); skb_frag_size_set(frag, len); memcpy(skb_frag_address(frag), scratch, len); |