summaryrefslogtreecommitdiffstats
path: root/net/netfilter
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 08:28:08 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-21 08:28:08 -0800
commitfcc79e1714e8c2b8e216dc3149812edd37884eef (patch)
tree17a51d29db810b81412be040aaf380936b3261b4 /net/netfilter
parent6e95ef0258ff4ee23ae3b06bf6b00b33dbbd5ef7 (diff)
parentdd7207838d38780b51e4690ee508ab2d5057e099 (diff)
downloadlinux-stable-master.tar.gz
linux-stable-master.tar.bz2
linux-stable-master.zip
Merge tag 'net-next-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-nextHEADmaster
Pull networking updates from Paolo Abeni: "The most significant set of changes is the per netns RTNL. The new behavior is disabled by default, regression risk should be contained. Notably the new config knob PTP_1588_CLOCK_VMCLOCK will inherit its default value from PTP_1588_CLOCK_KVM, as the first is intended to be a more reliable replacement for the latter. Core: - Started a very large, in-progress, effort to make the RTNL lock scope per network-namespace, thus reducing the lock contention significantly in the containerized use-case, comprising: - RCU-ified some relevant slices of the FIB control path - introduce basic per netns locking helpers - namespacified the IPv4 address hash table - remove rtnl_register{,_module}() in favour of rtnl_register_many() - refactor rtnl_{new,del,set}link() moving as much validation as possible out of RTNL lock - convert all phonet doit() and dumpit() handlers to RCU - convert IPv4 addresses manipulation to per-netns RTNL - convert virtual interface creation to per-netns RTNL the per-netns lock infrastructure is guarded by the CONFIG_DEBUG_NET_SMALL_RTNL knob, disabled by default ad interim. - Introduce NAPI suspension, to efficiently switching between busy polling (NAPI processing suspended) and normal processing. - Migrate the IPv4 routing input, output and control path from direct ToS usage to DSCP macros. This is a work in progress to make ECN handling consistent and reliable. - Add drop reasons support to the IPv4 rotue input path, allowing better introspection in case of packets drop. - Make FIB seqnum lockless, dropping RTNL protection for read access. - Make inet{,v6} addresses hashing less predicable. - Allow providing timestamp OPT_ID via cmsg, to correlate TX packets and timestamps Things we sprinkled into general kernel code: - Add small file operations for debugfs, to reduce the struct ops size. - Refactoring and optimization for the implementation of page_frag API, This is a preparatory work to consolidate the page_frag implementation. Netfilter: - Optimize set element transactions to reduce memory consumption - Extended netlink error reporting for attribute parser failure. - Make legacy xtables configs user selectable, giving users the option to configure iptables without enabling any other config. - Address a lot of false-positive RCU issues, pointed by recent CI improvements. BPF: - Put xsk sockets on a struct diet and add various cleanups. Overall, this helps to bump performance by 12% for some workloads. - Extend BPF selftests to increase coverage of XDP features in combination with BPF cpumap. - Optimize and homogenize bpf_csum_diff helper for all archs and also add a batch of new BPF selftests for it. - Extend netkit with an option to delegate skb->{mark,priority} scrubbing to its BPF program. - Make the bpf_get_netns_cookie() helper available also to tc(x) BPF programs. Protocols: - Introduces 4-tuple hash for connected udp sockets, speeding-up significantly connected sockets lookup. - Add a fastpath for some TCP timers that usually expires after close, the socket lock contention. - Add inbound and outbound xfrm state caches to speed up state lookups. - Avoid sending MPTCP advertisements on stale subflows, reducing risks on loosing them. - Make neighbours table flushing more scalable, maintaining per device neigh lists. Driver API: - Introduce a unified interface to configure transmission H/W shaping, and expose it to user-space via generic-netlink. - Add support for per-NAPI config via netlink. This makes napi configuration persistent across queues removal and re-creation. Requires driver updates, currently supported drivers are: nVidia/Mellanox mlx4 and mlx5, Broadcom brcm and Intel ice. - Add ethtool support for writing SFP / PHY firmware blocks. - Track RSS context allocation from ethtool core. - Implement support for mirroring to DSA CPU port, via TC mirror offload. - Consolidate FDB updates notification, to avoid duplicates on device-specific entries. - Expose DPLL clock quality level to the user-space. - Support master-slave PHY config via device tree. Tests and tooling: - forwarding: introduce deferred commands, to simplify the cleanup phase Drivers: - Updated several drivers - Amazon vNic, Google vNic, Microsoft vNic, Intel e1000e and Broadcom Tigon3 - to use netdev-genl to link the IRQs and queues to NAPI IDs, allowing busy polling and better introspection. - Ethernet high-speed NICs: - nVidia/Mellanox: - mlx5: - a large refactor to implement support for cross E-Switch scheduling - refactor H/W conter management to let it scale better - H/W GRO cleanups - Intel (100G, ice):: - add support for ethtool reset - implement support for per TX queue H/W shaping - AMD/Solarflare: - implement per device queue stats support - Broadcom (bnxt): - improve wildcard l4proto on IPv4/IPv6 ntuple rules - Marvell Octeon: - Add representor support for each Resource Virtualization Unit (RVU) device. - Hisilicon: - add support for the BMC Gigabit Ethernet - IBM (EMAC): - driver cleanup and modernization - Cisco (VIC): - raise the queues number limit to 256 - Ethernet virtual: - Google vNIC: - implement page pool support - macsec: - inherit lower device's features and TSO limits when offloading - virtio_net: - enable premapped mode by default - support for XDP socket(AF_XDP) zerocopy TX - wireguard: - set the TSO max size to be GSO_MAX_SIZE, to aggregate larger packets. - Ethernet NICs embedded and virtual: - Broadcom ASP: - enable software timestamping - Freescale: - add enetc4 PF driver - MediaTek: Airoha SoC: - implement BQL support - RealTek r8169: - enable TSO by default on r8168/r8125 - implement extended ethtool stats - Renesas AVB: - enable TX checksum offload - Synopsys (stmmac): - support header splitting for vlan tagged packets - move common code for DWMAC4 and DWXGMAC into a separate FPE module. - add dwmac driver support for T-HEAD TH1520 SoC - Synopsys (xpcs): - driver refactor and cleanup - TI: - icssg_prueth: add VLAN offload support - Xilinx emaclite: - add clock support - Ethernet switches: - Microchip: - implement support for the lan969x Ethernet switch family - add LAN9646 switch support to KSZ DSA driver - Ethernet PHYs: - Marvel: 88q2x: enable auto negotiation - Microchip: add support for LAN865X Rev B1 and LAN867X Rev C1/C2 - PTP: - Add support for the Amazon virtual clock device - Add PtP driver for s390 clocks - WiFi: - mac80211 - EHT 1024 aggregation size for transmissions - new operation to indicate that a new interface is to be added - support radio separation of multi-band devices - move wireless extension spy implementation to libiw - Broadcom: - brcmfmac: optional LPO clock support - Microchip: - add support for Atmel WILC3000 - Qualcomm (ath12k): - firmware coredump collection support - add debugfs support for a multitude of statistics - Qualcomm (ath5k): - Arcadyan ARV45XX AR2417 & Gigaset SX76[23] AR241[34]A support - Realtek: - rtw88: 8821au and 8812au USB adapters support - rtw89: add thermal protection - rtw89: fine tune BT-coexsitence to improve user experience - rtw89: firmware secure boot for WiFi 6 chip - Bluetooth - add Qualcomm WCN785x support for ids Foxconn 0xe0fc/0xe0f3 and 0x13d3:0x3623 - add Realtek RTL8852BE support for id Foxconn 0xe123 - add MediaTek MT7920 support for wireless module ids - btintel_pcie: add handshake between driver and firmware - btintel_pcie: add recovery mechanism - btnxpuart: add GPIO support to power save feature" * tag 'net-next-6.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1475 commits) mm: page_frag: fix a compile error when kernel is not compiled Documentation: tipc: fix formatting issue in tipc.rst selftests: nic_performance: Add selftest for performance of NIC driver selftests: nic_link_layer: Add selftest case for speed and duplex states selftests: nic_link_layer: Add link layer selftest for NIC driver bnxt_en: Add FW trace coredump segments to the coredump bnxt_en: Add a new ethtool -W dump flag bnxt_en: Add 2 parameters to bnxt_fill_coredump_seg_hdr() bnxt_en: Add functions to copy host context memory bnxt_en: Do not free FW log context memory bnxt_en: Manage the FW trace context memory bnxt_en: Allocate backing store memory for FW trace logs bnxt_en: Add a 'force' parameter to bnxt_free_ctx_mem() bnxt_en: Refactor bnxt_free_ctx_mem() bnxt_en: Add mem_valid bit to struct bnxt_ctx_mem_type bnxt_en: Update firmware interface spec to 1.10.3.85 selftests/bpf: Add some tests with sockmap SK_PASS bpf: fix recursive lock when verdict program return SK_PASS wireguard: device: support big tcp GSO wireguard: selftests: load nf_conntrack if not present ...
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c7
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c5
-rw-r--r--net/netfilter/nf_bpf_link.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c2
-rw-r--r--net/netfilter/nf_nat_core.c6
-rw-r--r--net/netfilter/nf_tables_api.c517
-rw-r--r--net/netfilter/nfnetlink.c2
-rw-r--r--net/netfilter/nft_bitwise.c166
-rw-r--r--net/netfilter/nft_flow_offload.c8
-rw-r--r--net/netfilter/nft_set_bitmap.c10
-rw-r--r--net/netfilter/nft_set_hash.c3
-rw-r--r--net/netfilter/nft_tunnel.c5
12 files changed, 545 insertions, 188 deletions
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index e4fa00abde6a..5988b9bb9029 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -163,11 +163,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to);
if (ret)
return ret;
- if (ip > ip_to) {
+ if (ip > ip_to)
swap(ip, ip_to);
- if (ip < map->first_ip)
- return -IPSET_ERR_BITMAP_RANGE;
- }
} else if (tb[IPSET_ATTR_CIDR]) {
u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]);
@@ -178,7 +175,7 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
ip_to = ip;
}
- if (ip_to > map->last_ip)
+ if (ip < map->first_ip || ip_to > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
for (; !before(ip_to, ip); ip += map->hosts) {
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index dc6ddc4abbe2..7d13110ce188 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -3662,10 +3662,7 @@ static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
udest->port = nla_get_be16(nla_port);
- if (nla_addr_family)
- udest->af = nla_get_u16(nla_addr_family);
- else
- udest->af = 0;
+ udest->af = nla_get_u16_default(nla_addr_family, 0);
/* If a full entry was requested, check for the additional fields */
if (full_entry) {
diff --git a/net/netfilter/nf_bpf_link.c b/net/netfilter/nf_bpf_link.c
index 3d64a4511fcf..06b084844700 100644
--- a/net/netfilter/nf_bpf_link.c
+++ b/net/netfilter/nf_bpf_link.c
@@ -43,7 +43,7 @@ get_proto_defrag_hook(struct bpf_nf_link *link,
hook = rcu_dereference(*ptr_global_hook);
if (!hook) {
rcu_read_unlock();
- err = request_module(mod);
+ err = request_module("%s", mod);
if (err)
return ERR_PTR(err < 0 ? err : -EINVAL);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 6a1239433830..36168f8b6efa 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3870,7 +3870,7 @@ static int __init ctnetlink_init(void)
{
int ret;
- NL_ASSERT_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx);
+ NL_ASSERT_CTX_FITS(struct ctnetlink_list_dump_ctx);
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 4085c436e306..aad84aabd7f1 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1090,10 +1090,8 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
range->flags |= NF_NAT_RANGE_MAP_IPS;
}
- if (tb[CTA_NAT_V4_MAXIP])
- range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
- else
- range->max_addr.ip = range->min_addr.ip;
+ range->max_addr.ip = nla_get_be32_default(tb[CTA_NAT_V4_MAXIP],
+ range->min_addr.ip);
return 0;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 588a2757986c..21b6f7410a1f 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -26,6 +26,9 @@
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
#define NFT_SET_MAX_ANONLEN 16
+/* limit compaction to avoid huge kmalloc/krealloc sizes. */
+#define NFT_MAX_SET_NELEMS ((2048 - sizeof(struct nft_trans_elem)) / sizeof(struct nft_trans_one_elem))
+
unsigned int nf_tables_net_id __read_mostly;
static LIST_HEAD(nf_tables_expressions);
@@ -391,6 +394,86 @@ static void nf_tables_unregister_hook(struct net *net,
return __nf_tables_unregister_hook(net, table, chain, false);
}
+static bool nft_trans_collapse_set_elem_allowed(const struct nft_trans_elem *a, const struct nft_trans_elem *b)
+{
+ /* NB: the ->bound equality check is defensive, at this time we only merge
+ * a new nft_trans_elem transaction request with the transaction tail
+ * element, but a->bound != b->bound would imply a NEWRULE transaction
+ * is queued in-between.
+ *
+ * The set check is mandatory, the NFT_MAX_SET_NELEMS check prevents
+ * huge krealloc() requests.
+ */
+ return a->set == b->set && a->bound == b->bound && a->nelems < NFT_MAX_SET_NELEMS;
+}
+
+static bool nft_trans_collapse_set_elem(struct nftables_pernet *nft_net,
+ struct nft_trans_elem *tail,
+ struct nft_trans_elem *trans,
+ gfp_t gfp)
+{
+ unsigned int nelems, old_nelems = tail->nelems;
+ struct nft_trans_elem *new_trans;
+
+ if (!nft_trans_collapse_set_elem_allowed(tail, trans))
+ return false;
+
+ /* "cannot happen", at this time userspace element add
+ * requests always allocate a new transaction element.
+ *
+ * This serves as a reminder to adjust the list_add_tail
+ * logic below in case this ever changes.
+ */
+ if (WARN_ON_ONCE(trans->nelems != 1))
+ return false;
+
+ if (check_add_overflow(old_nelems, trans->nelems, &nelems))
+ return false;
+
+ /* krealloc might free tail which invalidates list pointers */
+ list_del_init(&tail->nft_trans.list);
+
+ new_trans = krealloc(tail, struct_size(tail, elems, nelems), gfp);
+ if (!new_trans) {
+ list_add_tail(&tail->nft_trans.list, &nft_net->commit_list);
+ return false;
+ }
+
+ /*
+ * new_trans->nft_trans.list contains garbage, but
+ * list_add_tail() doesn't care.
+ */
+ new_trans->nelems = nelems;
+ new_trans->elems[old_nelems] = trans->elems[0];
+ list_add_tail(&new_trans->nft_trans.list, &nft_net->commit_list);
+
+ return true;
+}
+
+static bool nft_trans_try_collapse(struct nftables_pernet *nft_net,
+ struct nft_trans *trans, gfp_t gfp)
+{
+ struct nft_trans *tail;
+
+ if (list_empty(&nft_net->commit_list))
+ return false;
+
+ tail = list_last_entry(&nft_net->commit_list, struct nft_trans, list);
+
+ if (tail->msg_type != trans->msg_type)
+ return false;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSETELEM:
+ case NFT_MSG_DELSETELEM:
+ return nft_trans_collapse_set_elem(nft_net,
+ nft_trans_container_elem(tail),
+ nft_trans_container_elem(trans), gfp);
+ }
+
+ return false;
+}
+
static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans)
{
struct nftables_pernet *nft_net = nft_pernet(net);
@@ -421,6 +504,24 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
}
}
+static void nft_trans_commit_list_add_elem(struct net *net, struct nft_trans *trans,
+ gfp_t gfp)
+{
+ struct nftables_pernet *nft_net = nft_pernet(net);
+
+ WARN_ON_ONCE(trans->msg_type != NFT_MSG_NEWSETELEM &&
+ trans->msg_type != NFT_MSG_DELSETELEM);
+
+ might_alloc(gfp);
+
+ if (nft_trans_try_collapse(nft_net, trans, gfp)) {
+ kfree(trans);
+ return;
+ }
+
+ nft_trans_commit_list_add_tail(net, trans);
+}
+
static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type)
{
struct nft_trans *trans;
@@ -1825,7 +1926,8 @@ nla_put_failure:
return -ENOSPC;
}
-static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
+static int nft_dump_basechain_hook(struct sk_buff *skb,
+ const struct net *net, int family,
const struct nft_base_chain *basechain,
const struct list_head *hook_list)
{
@@ -1850,7 +1952,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
if (!hook_list)
hook_list = &basechain->hook_list;
- list_for_each_entry_rcu(hook, hook_list, list) {
+ list_for_each_entry_rcu(hook, hook_list, list,
+ lockdep_commit_lock_is_held(net)) {
if (!first)
first = hook;
@@ -1901,7 +2004,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
const struct nft_base_chain *basechain = nft_base_chain(chain);
struct nft_stats __percpu *stats;
- if (nft_dump_basechain_hook(skb, family, basechain, hook_list))
+ if (nft_dump_basechain_hook(skb, net, family, basechain, hook_list))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
@@ -2083,14 +2186,14 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr)
err = nla_parse_nested_deprecated(tb, NFTA_COUNTER_MAX, attr,
nft_counter_policy, NULL);
if (err < 0)
- return ERR_PTR(err);
+ return ERR_PTR_PCPU(err);
if (!tb[NFTA_COUNTER_BYTES] || !tb[NFTA_COUNTER_PACKETS])
- return ERR_PTR(-EINVAL);
+ return ERR_PTR_PCPU(-EINVAL);
newstats = netdev_alloc_pcpu_stats(struct nft_stats);
if (newstats == NULL)
- return ERR_PTR(-ENOMEM);
+ return ERR_PTR_PCPU(-ENOMEM);
/* Restore old counters on this cpu, no problem. Per-cpu statistics
* are not exposed to userspace.
@@ -2534,10 +2637,10 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (nla[NFTA_CHAIN_COUNTERS]) {
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats)) {
+ if (IS_ERR_PCPU(stats)) {
nft_chain_release_hook(&hook);
kfree(basechain);
- return PTR_ERR(stats);
+ return PTR_ERR_PCPU(stats);
}
rcu_assign_pointer(basechain->stats, stats);
}
@@ -2651,7 +2754,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
struct nft_table *table = ctx->table;
struct nft_chain *chain = ctx->chain;
struct nft_chain_hook hook = {};
- struct nft_stats *stats = NULL;
+ struct nft_stats __percpu *stats = NULL;
struct nft_hook *h, *next;
struct nf_hook_ops *ops;
struct nft_trans *trans;
@@ -2747,8 +2850,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
}
stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
- if (IS_ERR(stats)) {
- err = PTR_ERR(stats);
+ if (IS_ERR_PCPU(stats)) {
+ err = PTR_ERR_PCPU(stats);
goto err_hooks;
}
}
@@ -3295,25 +3398,37 @@ int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla,
if (!tb[NFTA_EXPR_DATA] || !tb[NFTA_EXPR_NAME])
return -EINVAL;
+ rcu_read_lock();
+
type = __nft_expr_type_get(ctx->family, tb[NFTA_EXPR_NAME]);
- if (!type)
- return -ENOENT;
+ if (!type) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
- if (!type->inner_ops)
- return -EOPNOTSUPP;
+ if (!type->inner_ops) {
+ err = -EOPNOTSUPP;
+ goto out_unlock;
+ }
err = nla_parse_nested_deprecated(info->tb, type->maxattr,
tb[NFTA_EXPR_DATA],
type->policy, NULL);
if (err < 0)
- goto err_nla_parse;
+ goto out_unlock;
info->attr = nla;
info->ops = type->inner_ops;
+ /* No module reference will be taken on type->owner.
+ * Presence of type->inner_ops implies that the expression
+ * is builtin, so it cannot go away.
+ */
+ rcu_read_unlock();
return 0;
-err_nla_parse:
+out_unlock:
+ rcu_read_unlock();
return err;
}
@@ -3412,13 +3527,15 @@ void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr)
* Rules
*/
-static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
+static struct nft_rule *__nft_rule_lookup(const struct net *net,
+ const struct nft_chain *chain,
u64 handle)
{
struct nft_rule *rule;
// FIXME: this sucks
- list_for_each_entry_rcu(rule, &chain->rules, list) {
+ list_for_each_entry_rcu(rule, &chain->rules, list,
+ lockdep_commit_lock_is_held(net)) {
if (handle == rule->handle)
return rule;
}
@@ -3426,13 +3543,14 @@ static struct nft_rule *__nft_rule_lookup(const struct nft_chain *chain,
return ERR_PTR(-ENOENT);
}
-static struct nft_rule *nft_rule_lookup(const struct nft_chain *chain,
+static struct nft_rule *nft_rule_lookup(const struct net *net,
+ const struct nft_chain *chain,
const struct nlattr *nla)
{
if (nla == NULL)
return ERR_PTR(-EINVAL);
- return __nft_rule_lookup(chain, be64_to_cpu(nla_get_be64(nla)));
+ return __nft_rule_lookup(net, chain, be64_to_cpu(nla_get_be64(nla)));
}
static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
@@ -3733,7 +3851,7 @@ static int nf_tables_dump_rules_done(struct netlink_callback *cb)
return 0;
}
-/* called with rcu_read_lock held */
+/* Caller must hold rcu read lock or transaction mutex */
static struct sk_buff *
nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
const struct nlattr * const nla[], bool reset)
@@ -3760,7 +3878,7 @@ nf_tables_getrule_single(u32 portid, const struct nfnl_info *info,
return ERR_CAST(chain);
}
- rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ rule = nft_rule_lookup(net, chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return ERR_CAST(rule);
@@ -3984,7 +4102,8 @@ int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, dummy_iter.genmask))
continue;
@@ -4058,7 +4177,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_RULE_HANDLE]) {
handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_HANDLE]));
- rule = __nft_rule_lookup(chain, handle);
+ rule = __nft_rule_lookup(net, chain, handle);
if (IS_ERR(rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return PTR_ERR(rule);
@@ -4080,7 +4199,7 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
- old_rule = __nft_rule_lookup(chain, pos_handle);
+ old_rule = __nft_rule_lookup(net, chain, pos_handle);
if (IS_ERR(old_rule)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_POSITION]);
return PTR_ERR(old_rule);
@@ -4297,7 +4416,7 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
if (chain) {
if (nla[NFTA_RULE_HANDLE]) {
- rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
+ rule = nft_rule_lookup(info->net, chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
if (PTR_ERR(rule) == -ENOENT &&
NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE)
@@ -4457,7 +4576,8 @@ static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
[NFTA_SET_DESC_CONCAT] = NLA_POLICY_NESTED_ARRAY(nft_concat_policy),
};
-static struct nft_set *nft_set_lookup(const struct nft_table *table,
+static struct nft_set *nft_set_lookup(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nft_set *set;
@@ -4465,7 +4585,8 @@ static struct nft_set *nft_set_lookup(const struct nft_table *table,
if (nla == NULL)
return ERR_PTR(-EINVAL);
- list_for_each_entry_rcu(set, &table->sets, list) {
+ list_for_each_entry_rcu(set, &table->sets, list,
+ lockdep_commit_lock_is_held(net)) {
if (!nla_strcmp(nla, set->name) &&
nft_active_genmask(set, genmask))
return set;
@@ -4515,7 +4636,7 @@ struct nft_set *nft_set_lookup_global(const struct net *net,
{
struct nft_set *set;
- set = nft_set_lookup(table, nla_set_name, genmask);
+ set = nft_set_lookup(net, table, nla_set_name, genmask);
if (IS_ERR(set)) {
if (!nla_set_id)
return set;
@@ -4891,7 +5012,7 @@ static int nf_tables_getset(struct sk_buff *skb, const struct nfnl_info *info,
if (!nla[NFTA_SET_TABLE])
return -EINVAL;
- set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return PTR_ERR(set);
@@ -5227,7 +5348,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
- set = nft_set_lookup(table, nla[NFTA_SET_NAME], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_NAME], genmask);
if (IS_ERR(set)) {
if (PTR_ERR(set) != -ENOENT) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
@@ -5429,7 +5550,7 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
set = nft_set_lookup_byhandle(table, attr, genmask);
} else {
attr = nla[NFTA_SET_NAME];
- set = nft_set_lookup(table, attr, genmask);
+ set = nft_set_lookup(net, table, attr, genmask);
}
if (IS_ERR(set)) {
@@ -5493,7 +5614,8 @@ static int nft_set_catchall_bind_check(const struct nft_ctx *ctx,
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, genmask))
continue;
@@ -6259,7 +6381,7 @@ static int nft_set_dump_ctx_init(struct nft_set_dump_ctx *dump_ctx,
return PTR_ERR(table);
}
- set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
@@ -6410,17 +6532,21 @@ err:
nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
-static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
+static struct nft_trans *nft_trans_elem_alloc(const struct nft_ctx *ctx,
int msg_type,
struct nft_set *set)
{
+ struct nft_trans_elem *te;
struct nft_trans *trans;
- trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_elem));
+ trans = nft_trans_alloc(ctx, msg_type, struct_size(te, elems, 1));
if (trans == NULL)
return NULL;
- nft_trans_elem_set(trans) = set;
+ te = nft_trans_container_elem(trans);
+ te->nelems = 1;
+ te->set = set;
+
return trans;
}
@@ -6542,28 +6668,52 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
}
/* Drop references and destroy. Called from gc, dynset and abort path. */
-void nft_set_elem_destroy(const struct nft_set *set,
- const struct nft_elem_priv *elem_priv,
- bool destroy_expr)
+static void __nft_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
+ bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem_priv);
- struct nft_ctx ctx = {
- .net = read_pnet(&set->net),
- .family = set->table->family,
- };
nft_data_release(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS))
- nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
+ nft_set_elem_expr_destroy(ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem_priv);
}
+
+/* Drop references and destroy. Called from gc and dynset. */
+void nft_set_elem_destroy(const struct nft_set *set,
+ const struct nft_elem_priv *elem_priv,
+ bool destroy_expr)
+{
+ struct nft_ctx ctx = {
+ .net = read_pnet(&set->net),
+ .family = set->table->family,
+ };
+
+ __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr);
+}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
+/* Drop references and destroy. Called from abort path. */
+static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ /* skip update request, see nft_trans_elems_new_abort() */
+ if (!te->elems[i].priv)
+ continue;
+
+ __nft_set_elem_destroy(ctx, te->set, te->elems[i].priv, true);
+ }
+}
+
/* Destroy element. References have been already dropped in the preparation
* path via nft_setelem_data_deactivate().
*/
@@ -6579,6 +6729,15 @@ void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
kfree(elem_priv);
}
+static void nft_trans_elems_destroy(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++)
+ nf_tables_set_elem_destroy(ctx, te->set, te->elems[i].priv);
+}
+
int nft_set_elem_expr_clone(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_expr *expr_array[])
{
@@ -6735,6 +6894,38 @@ static void nft_setelem_activate(struct net *net, struct nft_set *set,
}
}
+static void nft_trans_elem_update(const struct nft_set *set,
+ const struct nft_trans_one_elem *elem)
+{
+ const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
+ const struct nft_elem_update *update = elem->update;
+
+ if (update->flags & NFT_TRANS_UPD_TIMEOUT)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->timeout, update->timeout);
+
+ if (update->flags & NFT_TRANS_UPD_EXPIRATION)
+ WRITE_ONCE(nft_set_ext_timeout(ext)->expiration, get_jiffies_64() + update->expiration);
+}
+
+static void nft_trans_elems_add(const struct nft_ctx *ctx,
+ struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ struct nft_trans_one_elem *elem = &te->elems[i];
+
+ if (elem->update)
+ nft_trans_elem_update(te->set, elem);
+ else
+ nft_setelem_activate(ctx->net, te->set, elem->priv);
+
+ nf_tables_setelem_notify(ctx, te->set, elem->priv,
+ NFT_MSG_NEWSETELEM);
+ kfree(elem->update);
+ }
+}
+
static int nft_setelem_catchall_deactivate(const struct net *net,
struct nft_set *set,
struct nft_set_elem *elem)
@@ -6817,6 +7008,26 @@ static void nft_setelem_remove(const struct net *net,
set->ops->remove(net, set, elem_priv);
}
+static void nft_trans_elems_remove(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ WARN_ON_ONCE(te->elems[i].update);
+
+ nf_tables_setelem_notify(ctx, te->set,
+ te->elems[i].priv,
+ te->nft_trans.msg_type);
+
+ nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv)) {
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
+ }
+ }
+}
+
static bool nft_setelem_valid_key_end(const struct nft_set *set,
struct nlattr **nla, u32 flags)
{
@@ -6853,7 +7064,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
struct nft_data_desc desc;
enum nft_registers dreg;
struct nft_trans *trans;
- u8 update_flags;
u64 expiration;
u64 timeout;
int err, i;
@@ -7168,23 +7378,33 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
else if (!(nlmsg_flags & NLM_F_EXCL)) {
err = 0;
if (nft_set_ext_exists(ext2, NFT_SET_EXT_TIMEOUT)) {
- update_flags = 0;
+ struct nft_elem_update update = { };
+
if (timeout != nft_set_ext_timeout(ext2)->timeout) {
- nft_trans_elem_timeout(trans) = timeout;
+ update.timeout = timeout;
if (expiration == 0)
expiration = timeout;
- update_flags |= NFT_TRANS_UPD_TIMEOUT;
+ update.flags |= NFT_TRANS_UPD_TIMEOUT;
}
if (expiration) {
- nft_trans_elem_expiration(trans) = expiration;
- update_flags |= NFT_TRANS_UPD_EXPIRATION;
+ update.expiration = expiration;
+ update.flags |= NFT_TRANS_UPD_EXPIRATION;
}
- if (update_flags) {
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_elem_update_flags(trans) = update_flags;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ if (update.flags) {
+ struct nft_trans_one_elem *ue;
+
+ ue = &nft_trans_container_elem(trans)->elems[0];
+
+ ue->update = kmemdup(&update, sizeof(update), GFP_KERNEL);
+ if (!ue->update) {
+ err = -ENOMEM;
+ goto err_element_clash;
+ }
+
+ ue->priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
goto err_elem_free;
}
}
@@ -7207,8 +7427,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
}
}
- nft_trans_elem_priv(trans) = elem.priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
err_set_full:
@@ -7345,6 +7565,55 @@ void nft_setelem_data_deactivate(const struct net *net,
nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
+/* similar to nft_trans_elems_remove, but called from abort path to undo newsetelem.
+ * No notifications and no ndeact changes.
+ *
+ * Returns true if set had been added to (i.e., elements need to be removed again).
+ */
+static bool nft_trans_elems_new_abort(const struct nft_ctx *ctx,
+ struct nft_trans_elem *te)
+{
+ bool removed = false;
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (te->elems[i].update) {
+ kfree(te->elems[i].update);
+ te->elems[i].update = NULL;
+ /* Update request, so do not release this element */
+ te->elems[i].priv = NULL;
+ continue;
+ }
+
+ if (!te->set->ops->abort || nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ nft_setelem_remove(ctx->net, te->set, te->elems[i].priv);
+
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ atomic_dec(&te->set->nelems);
+
+ removed = true;
+ }
+
+ return removed;
+}
+
+/* Called from abort path to undo DELSETELEM/DESTROYSETELEM. */
+static void nft_trans_elems_destroy_abort(const struct nft_ctx *ctx,
+ const struct nft_trans_elem *te)
+{
+ int i;
+
+ for (i = 0; i < te->nelems; i++) {
+ if (!nft_setelem_active_next(ctx->net, te->set, te->elems[i].priv)) {
+ nft_setelem_data_activate(ctx->net, te->set, te->elems[i].priv);
+ nft_setelem_activate(ctx->net, te->set, te->elems[i].priv);
+ }
+
+ if (!nft_setelem_is_catchall(te->set, te->elems[i].priv))
+ te->set->ndeact--;
+ }
+}
+
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
const struct nlattr *attr)
{
@@ -7424,8 +7693,8 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
nft_setelem_data_deactivate(ctx->net, set, elem.priv);
- nft_trans_elem_priv(trans) = elem.priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem.priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
fail_ops:
@@ -7451,7 +7720,8 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
return 0;
trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- sizeof(struct nft_trans_elem), GFP_ATOMIC);
+ struct_size_t(struct nft_trans_elem, elems, 1),
+ GFP_ATOMIC);
if (!trans)
return -ENOMEM;
@@ -7460,8 +7730,9 @@ static int nft_setelem_flush(const struct nft_ctx *ctx,
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
nft_trans_elem_set(trans) = set;
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->nelems = 1;
+ nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_ATOMIC);
return 0;
}
@@ -7472,15 +7743,13 @@ static int __nft_set_catchall_flush(const struct nft_ctx *ctx,
{
struct nft_trans *trans;
- trans = nft_trans_alloc_gfp(ctx, NFT_MSG_DELSETELEM,
- sizeof(struct nft_trans_elem), GFP_KERNEL);
+ trans = nft_trans_elem_alloc(ctx, NFT_MSG_DELSETELEM, set);
if (!trans)
return -ENOMEM;
nft_setelem_data_deactivate(ctx->net, set, elem_priv);
- nft_trans_elem_set(trans) = set;
- nft_trans_elem_priv(trans) = elem_priv;
- nft_trans_commit_list_add_tail(ctx->net, trans);
+ nft_trans_container_elem(trans)->elems[0].priv = elem_priv;
+ nft_trans_commit_list_add_elem(ctx->net, trans, GFP_KERNEL);
return 0;
}
@@ -7493,7 +7762,8 @@ static int nft_set_catchall_flush(const struct nft_ctx *ctx,
struct nft_set_ext *ext;
int ret = 0;
- list_for_each_entry_rcu(catchall, &set->catchall_list, list) {
+ list_for_each_entry_rcu(catchall, &set->catchall_list, list,
+ lockdep_commit_lock_is_held(ctx->net)) {
ext = nft_set_elem_ext(set, catchall->elem);
if (!nft_set_elem_active(ext, genmask))
continue;
@@ -7543,7 +7813,7 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
return PTR_ERR(table);
}
- set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
+ set = nft_set_lookup(net, table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_SET]);
return PTR_ERR(set);
@@ -7790,9 +8060,7 @@ static int nf_tables_updobj(const struct nft_ctx *ctx,
struct nft_trans *trans;
int err = -ENOMEM;
- if (!try_module_get(type->owner))
- return -ENOENT;
-
+ /* caller must have obtained type->owner reference. */
trans = nft_trans_alloc(ctx, NFT_MSG_NEWOBJ,
sizeof(struct nft_trans_obj));
if (!trans)
@@ -7860,15 +8128,16 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = __nft_obj_type_get(objtype, family);
- if (WARN_ON_ONCE(!type))
- return -ENOENT;
-
if (!obj->ops->update)
return 0;
+ type = nft_obj_type_get(net, objtype, family);
+ if (WARN_ON_ONCE(IS_ERR(type)))
+ return PTR_ERR(type);
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ /* type->owner reference is put when transaction object is released. */
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
}
@@ -8104,7 +8373,7 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb)
return 0;
}
-/* called with rcu_read_lock held */
+/* Caller must hold rcu read lock or transaction mutex */
static struct sk_buff *
nf_tables_getobj_single(u32 portid, const struct nfnl_info *info,
const struct nlattr * const nla[], bool reset)
@@ -8373,12 +8642,14 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = {
[NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 },
};
-struct nft_flowtable *nft_flowtable_lookup(const struct nft_table *table,
+struct nft_flowtable *nft_flowtable_lookup(const struct net *net,
+ const struct nft_table *table,
const struct nlattr *nla, u8 genmask)
{
struct nft_flowtable *flowtable;
- list_for_each_entry_rcu(flowtable, &table->flowtables, list) {
+ list_for_each_entry_rcu(flowtable, &table->flowtables, list,
+ lockdep_commit_lock_is_held(net)) {
if (!nla_strcmp(nla, flowtable->name) &&
nft_active_genmask(flowtable, genmask))
return flowtable;
@@ -8734,7 +9005,7 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
return PTR_ERR(table);
}
- flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME],
genmask);
if (IS_ERR(flowtable)) {
err = PTR_ERR(flowtable);
@@ -8928,7 +9199,7 @@ static int nf_tables_delflowtable(struct sk_buff *skb,
flowtable = nft_flowtable_lookup_byhandle(table, attr, genmask);
} else {
attr = nla[NFTA_FLOWTABLE_NAME];
- flowtable = nft_flowtable_lookup(table, attr, genmask);
+ flowtable = nft_flowtable_lookup(net, table, attr, genmask);
}
if (IS_ERR(flowtable)) {
@@ -8998,7 +9269,8 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
if (!hook_list)
hook_list = &flowtable->hook_list;
- list_for_each_entry_rcu(hook, hook_list, list) {
+ list_for_each_entry_rcu(hook, hook_list, list,
+ lockdep_commit_lock_is_held(net)) {
if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
goto nla_put_failure;
}
@@ -9140,7 +9412,7 @@ static int nf_tables_getflowtable(struct sk_buff *skb,
return PTR_ERR(table);
}
- flowtable = nft_flowtable_lookup(table, nla[NFTA_FLOWTABLE_NAME],
+ flowtable = nft_flowtable_lookup(net, table, nla[NFTA_FLOWTABLE_NAME],
genmask);
if (IS_ERR(flowtable)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_FLOWTABLE_NAME]);
@@ -9658,9 +9930,7 @@ static void nft_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
- nf_tables_set_elem_destroy(&ctx,
- nft_trans_elem_set(trans),
- nft_trans_elem_priv(trans));
+ nft_trans_elems_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
@@ -10233,9 +10503,24 @@ static void nf_tables_commit_audit_free(struct list_head *adl)
}
}
+/* nft audit emits the number of elements that get added/removed/updated,
+ * so NEW/DELSETELEM needs to increment based on the total elem count.
+ */
+static unsigned int nf_tables_commit_audit_entrycount(const struct nft_trans *trans)
+{
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSETELEM:
+ case NFT_MSG_DELSETELEM:
+ return nft_trans_container_elem(trans)->nelems;
+ }
+
+ return 1;
+}
+
static void nf_tables_commit_audit_collect(struct list_head *adl,
- struct nft_table *table, u32 op)
+ const struct nft_trans *trans, u32 op)
{
+ const struct nft_table *table = trans->table;
struct nft_audit_data *adp;
list_for_each_entry(adp, adl, list) {
@@ -10245,7 +10530,7 @@ static void nf_tables_commit_audit_collect(struct list_head *adl,
WARN_ONCE(1, "table=%s not expected in commit list", table->name);
return;
found:
- adp->entries++;
+ adp->entries += nf_tables_commit_audit_entrycount(trans);
if (!adp->op || adp->op > op)
adp->op = op;
}
@@ -10404,7 +10689,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_ctx_update(&ctx, trans);
- nf_tables_commit_audit_collect(&adl, table, trans->msg_type);
+ nf_tables_commit_audit_collect(&adl, trans, trans->msg_type);
switch (trans->msg_type) {
case NFT_MSG_NEWTABLE:
if (nft_trans_table_update(trans)) {
@@ -10513,25 +10798,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = nft_trans_container_elem(trans);
- if (te->update_flags) {
- const struct nft_set_ext *ext =
- nft_set_elem_ext(te->set, te->elem_priv);
+ nft_trans_elems_add(&ctx, te);
- if (te->update_flags & NFT_TRANS_UPD_TIMEOUT) {
- WRITE_ONCE(nft_set_ext_timeout(ext)->timeout,
- te->timeout);
- }
- if (te->update_flags & NFT_TRANS_UPD_EXPIRATION) {
- WRITE_ONCE(nft_set_ext_timeout(ext)->expiration,
- get_jiffies_64() + te->expiration);
- }
- } else {
- nft_setelem_activate(net, te->set, te->elem_priv);
- }
-
- nf_tables_setelem_notify(&ctx, te->set,
- te->elem_priv,
- NFT_MSG_NEWSETELEM);
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@@ -10543,14 +10811,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
- nf_tables_setelem_notify(&ctx, te->set,
- te->elem_priv,
- trans->msg_type);
- nft_setelem_remove(net, te->set, te->elem_priv);
- if (!nft_setelem_is_catchall(te->set, te->elem_priv)) {
- atomic_dec(&te->set->nelems);
- te->set->ndeact--;
- }
+ nft_trans_elems_remove(&ctx, te);
+
if (te->set->ops->commit &&
list_empty(&te->set->pending_update)) {
list_add_tail(&te->set->pending_update,
@@ -10670,8 +10932,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
nft_set_destroy(&ctx, nft_trans_set(trans));
break;
case NFT_MSG_NEWSETELEM:
- nft_set_elem_destroy(nft_trans_elem_set(trans),
- nft_trans_elem_priv(trans), true);
+ nft_trans_set_elem_destroy(&ctx, nft_trans_container_elem(trans));
break;
case NFT_MSG_NEWOBJ:
nft_obj_destroy(&ctx, nft_trans_obj(trans));
@@ -10828,18 +11089,15 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- if (nft_trans_elem_update_flags(trans) ||
- nft_trans_elem_set_bound(trans)) {
+ if (nft_trans_elem_set_bound(trans)) {
nft_trans_destroy(trans);
break;
}
te = nft_trans_container_elem(trans);
- if (!te->set->ops->abort ||
- nft_setelem_is_catchall(te->set, te->elem_priv))
- nft_setelem_remove(net, te->set, te->elem_priv);
-
- if (!nft_setelem_is_catchall(te->set, te->elem_priv))
- atomic_dec(&te->set->nelems);
+ if (!nft_trans_elems_new_abort(&ctx, te)) {
+ nft_trans_destroy(trans);
+ break;
+ }
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
@@ -10851,12 +11109,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
case NFT_MSG_DESTROYSETELEM:
te = nft_trans_container_elem(trans);
- if (!nft_setelem_active_next(net, te->set, te->elem_priv)) {
- nft_setelem_data_activate(net, te->set, te->elem_priv);
- nft_setelem_activate(net, te->set, te->elem_priv);
- }
- if (!nft_setelem_is_catchall(te->set, te->elem_priv))
- te->set->ndeact--;
+ nft_trans_elems_destroy_abort(&ctx, te);
if (te->set->ops->abort &&
list_empty(&te->set->pending_update)) {
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 7784ec094097..e598a2a252b0 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -517,7 +517,7 @@ replay_abort:
err = nla_parse_deprecated(cda,
ss->cb[cb_id].attr_count,
attr, attrlen,
- ss->cb[cb_id].policy, NULL);
+ ss->cb[cb_id].policy, &extack);
if (err < 0)
goto ack;
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 7de95674fd8c..d550910aabec 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -17,6 +17,7 @@
struct nft_bitwise {
u8 sreg;
+ u8 sreg2;
u8 dreg;
enum nft_bitwise_ops op:8;
u8 len;
@@ -25,8 +26,8 @@ struct nft_bitwise {
struct nft_data data;
};
-static void nft_bitwise_eval_bool(u32 *dst, const u32 *src,
- const struct nft_bitwise *priv)
+static void nft_bitwise_eval_mask_xor(u32 *dst, const u32 *src,
+ const struct nft_bitwise *priv)
{
unsigned int i;
@@ -60,28 +61,72 @@ static void nft_bitwise_eval_rshift(u32 *dst, const u32 *src,
}
}
+static void nft_bitwise_eval_and(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] & src2[i];
+}
+
+static void nft_bitwise_eval_or(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] | src2[i];
+}
+
+static void nft_bitwise_eval_xor(u32 *dst, const u32 *src, const u32 *src2,
+ const struct nft_bitwise *priv)
+{
+ unsigned int i, n;
+
+ for (i = 0, n = DIV_ROUND_UP(priv->len, sizeof(u32)); i < n; i++)
+ dst[i] = src[i] ^ src2[i];
+}
+
void nft_bitwise_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt)
{
const struct nft_bitwise *priv = nft_expr_priv(expr);
- const u32 *src = &regs->data[priv->sreg];
+ const u32 *src = &regs->data[priv->sreg], *src2;
u32 *dst = &regs->data[priv->dreg];
- switch (priv->op) {
- case NFT_BITWISE_BOOL:
- nft_bitwise_eval_bool(dst, src, priv);
- break;
- case NFT_BITWISE_LSHIFT:
+ if (priv->op == NFT_BITWISE_MASK_XOR) {
+ nft_bitwise_eval_mask_xor(dst, src, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_LSHIFT) {
nft_bitwise_eval_lshift(dst, src, priv);
- break;
- case NFT_BITWISE_RSHIFT:
+ return;
+ }
+ if (priv->op == NFT_BITWISE_RSHIFT) {
nft_bitwise_eval_rshift(dst, src, priv);
- break;
+ return;
+ }
+
+ src2 = priv->sreg2 ? &regs->data[priv->sreg2] : priv->data.data;
+
+ if (priv->op == NFT_BITWISE_AND) {
+ nft_bitwise_eval_and(dst, src, src2, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_OR) {
+ nft_bitwise_eval_or(dst, src, src2, priv);
+ return;
+ }
+ if (priv->op == NFT_BITWISE_XOR) {
+ nft_bitwise_eval_xor(dst, src, src2, priv);
+ return;
}
}
static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_SREG] = { .type = NLA_U32 },
+ [NFTA_BITWISE_SREG2] = { .type = NLA_U32 },
[NFTA_BITWISE_DREG] = { .type = NLA_U32 },
[NFTA_BITWISE_LEN] = { .type = NLA_U32 },
[NFTA_BITWISE_MASK] = { .type = NLA_NESTED },
@@ -90,8 +135,8 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_DATA] = { .type = NLA_NESTED },
};
-static int nft_bitwise_init_bool(struct nft_bitwise *priv,
- const struct nlattr *const tb[])
+static int nft_bitwise_init_mask_xor(struct nft_bitwise *priv,
+ const struct nlattr *const tb[])
{
struct nft_data_desc mask = {
.type = NFT_DATA_VALUE,
@@ -105,7 +150,8 @@ static int nft_bitwise_init_bool(struct nft_bitwise *priv,
};
int err;
- if (tb[NFTA_BITWISE_DATA])
+ if (tb[NFTA_BITWISE_DATA] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_MASK] ||
@@ -139,7 +185,8 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
int err;
if (tb[NFTA_BITWISE_MASK] ||
- tb[NFTA_BITWISE_XOR])
+ tb[NFTA_BITWISE_XOR] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_DATA])
@@ -157,6 +204,41 @@ static int nft_bitwise_init_shift(struct nft_bitwise *priv,
return 0;
}
+static int nft_bitwise_init_bool(const struct nft_ctx *ctx,
+ struct nft_bitwise *priv,
+ const struct nlattr *const tb[])
+{
+ int err;
+
+ if (tb[NFTA_BITWISE_MASK] ||
+ tb[NFTA_BITWISE_XOR])
+ return -EINVAL;
+
+ if ((!tb[NFTA_BITWISE_DATA] && !tb[NFTA_BITWISE_SREG2]) ||
+ (tb[NFTA_BITWISE_DATA] && tb[NFTA_BITWISE_SREG2]))
+ return -EINVAL;
+
+ if (tb[NFTA_BITWISE_DATA]) {
+ struct nft_data_desc desc = {
+ .type = NFT_DATA_VALUE,
+ .size = sizeof(priv->data),
+ .len = priv->len,
+ };
+
+ err = nft_data_init(NULL, &priv->data, &desc,
+ tb[NFTA_BITWISE_DATA]);
+ if (err < 0)
+ return err;
+ } else {
+ err = nft_parse_register_load(ctx, tb[NFTA_BITWISE_SREG2],
+ &priv->sreg2, priv->len);
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
static int nft_bitwise_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -185,32 +267,40 @@ static int nft_bitwise_init(const struct nft_ctx *ctx,
if (tb[NFTA_BITWISE_OP]) {
priv->op = ntohl(nla_get_be32(tb[NFTA_BITWISE_OP]));
switch (priv->op) {
- case NFT_BITWISE_BOOL:
+ case NFT_BITWISE_MASK_XOR:
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
break;
default:
return -EOPNOTSUPP;
}
} else {
- priv->op = NFT_BITWISE_BOOL;
+ priv->op = NFT_BITWISE_MASK_XOR;
}
switch(priv->op) {
- case NFT_BITWISE_BOOL:
- err = nft_bitwise_init_bool(priv, tb);
+ case NFT_BITWISE_MASK_XOR:
+ err = nft_bitwise_init_mask_xor(priv, tb);
break;
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
err = nft_bitwise_init_shift(priv, tb);
break;
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
+ err = nft_bitwise_init_bool(ctx, priv, tb);
+ break;
}
return err;
}
-static int nft_bitwise_dump_bool(struct sk_buff *skb,
- const struct nft_bitwise *priv)
+static int nft_bitwise_dump_mask_xor(struct sk_buff *skb,
+ const struct nft_bitwise *priv)
{
if (nft_data_dump(skb, NFTA_BITWISE_MASK, &priv->mask,
NFT_DATA_VALUE, priv->len) < 0)
@@ -232,6 +322,21 @@ static int nft_bitwise_dump_shift(struct sk_buff *skb,
return 0;
}
+static int nft_bitwise_dump_bool(struct sk_buff *skb,
+ const struct nft_bitwise *priv)
+{
+ if (priv->sreg2) {
+ if (nft_dump_register(skb, NFTA_BITWISE_SREG2, priv->sreg2))
+ return -1;
+ } else {
+ if (nft_data_dump(skb, NFTA_BITWISE_DATA, &priv->data,
+ NFT_DATA_VALUE, sizeof(u32)) < 0)
+ return -1;
+ }
+
+ return 0;
+}
+
static int nft_bitwise_dump(struct sk_buff *skb,
const struct nft_expr *expr, bool reset)
{
@@ -248,13 +353,18 @@ static int nft_bitwise_dump(struct sk_buff *skb,
return -1;
switch (priv->op) {
- case NFT_BITWISE_BOOL:
- err = nft_bitwise_dump_bool(skb, priv);
+ case NFT_BITWISE_MASK_XOR:
+ err = nft_bitwise_dump_mask_xor(skb, priv);
break;
case NFT_BITWISE_LSHIFT:
case NFT_BITWISE_RSHIFT:
err = nft_bitwise_dump_shift(skb, priv);
break;
+ case NFT_BITWISE_AND:
+ case NFT_BITWISE_OR:
+ case NFT_BITWISE_XOR:
+ err = nft_bitwise_dump_bool(skb, priv);
+ break;
}
return err;
@@ -269,7 +379,7 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx,
const struct nft_bitwise *priv = nft_expr_priv(expr);
struct nft_offload_reg *reg = &ctx->regs[priv->dreg];
- if (priv->op != NFT_BITWISE_BOOL)
+ if (priv->op != NFT_BITWISE_MASK_XOR)
return -EOPNOTSUPP;
if (memcmp(&priv->xor, &zero, sizeof(priv->xor)) ||
@@ -299,6 +409,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
track->regs[priv->dreg].bitwise &&
track->regs[priv->dreg].bitwise->ops == expr->ops &&
priv->sreg == bitwise->sreg &&
+ priv->sreg2 == bitwise->sreg2 &&
priv->dreg == bitwise->dreg &&
priv->op == bitwise->op &&
priv->len == bitwise->len &&
@@ -375,7 +486,8 @@ static int nft_bitwise_fast_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
- if (tb[NFTA_BITWISE_DATA])
+ if (tb[NFTA_BITWISE_DATA] ||
+ tb[NFTA_BITWISE_SREG2])
return -EINVAL;
if (!tb[NFTA_BITWISE_MASK] ||
@@ -406,7 +518,7 @@ nft_bitwise_fast_dump(struct sk_buff *skb,
return -1;
if (nla_put_be32(skb, NFTA_BITWISE_LEN, htonl(sizeof(u32))))
return -1;
- if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_BOOL)))
+ if (nla_put_be32(skb, NFTA_BITWISE_OP, htonl(NFT_BITWISE_MASK_XOR)))
return -1;
data.data[0] = priv->mask;
@@ -501,7 +613,7 @@ nft_bitwise_select_ops(const struct nft_ctx *ctx,
return &nft_bitwise_ops;
if (tb[NFTA_BITWISE_OP] &&
- ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_BOOL)
+ ntohl(nla_get_be32(tb[NFTA_BITWISE_OP])) != NFT_BITWISE_MASK_XOR)
return &nft_bitwise_ops;
return &nft_bitwise_fast_ops;
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index 2f732fae5a83..3b474d235663 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -8,7 +8,7 @@
#include <linux/spinlock.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/ip.h> /* for ipv4 options. */
+#include <net/ip.h>
#include <net/inet_dscp.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
@@ -236,7 +236,7 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
fl.u.ip4.saddr = ct->tuplehash[!dir].tuple.src.u3.ip;
fl.u.ip4.flowi4_oif = nft_in(pkt)->ifindex;
fl.u.ip4.flowi4_iif = this_dst->dev->ifindex;
- fl.u.ip4.flowi4_tos = ip_hdr(pkt->skb)->tos & INET_DSCP_MASK;
+ fl.u.ip4.flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(ip_hdr(pkt->skb)));
fl.u.ip4.flowi4_mark = pkt->skb->mark;
fl.u.ip4.flowi4_flags = FLOWI_FLAG_ANYSRC;
break;
@@ -409,8 +409,8 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (!tb[NFTA_FLOW_TABLE_NAME])
return -EINVAL;
- flowtable = nft_flowtable_lookup(ctx->table, tb[NFTA_FLOW_TABLE_NAME],
- genmask);
+ flowtable = nft_flowtable_lookup(ctx->net, ctx->table,
+ tb[NFTA_FLOW_TABLE_NAME], genmask);
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 1caa04619dc6..12390d2e994f 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -88,13 +88,15 @@ bool nft_bitmap_lookup(const struct net *net, const struct nft_set *set,
}
static struct nft_bitmap_elem *
-nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this,
+nft_bitmap_elem_find(const struct net *net,
+ const struct nft_set *set, struct nft_bitmap_elem *this,
u8 genmask)
{
const struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be;
- list_for_each_entry_rcu(be, &priv->list, head) {
+ list_for_each_entry_rcu(be, &priv->list, head,
+ lockdep_is_held(&nft_pernet(net)->commit_mutex)) {
if (memcmp(nft_set_ext_key(&be->ext),
nft_set_ext_key(&this->ext), set->klen) ||
!nft_set_elem_active(&be->ext, genmask))
@@ -132,7 +134,7 @@ static int nft_bitmap_insert(const struct net *net, const struct nft_set *set,
u8 genmask = nft_genmask_next(net);
u32 idx, off;
- be = nft_bitmap_elem_find(set, new, genmask);
+ be = nft_bitmap_elem_find(net, set, new, genmask);
if (be) {
*elem_priv = &be->priv;
return -EEXIST;
@@ -201,7 +203,7 @@ nft_bitmap_deactivate(const struct net *net, const struct nft_set *set,
nft_bitmap_location(set, elem->key.val.data, &idx, &off);
- be = nft_bitmap_elem_find(set, this, genmask);
+ be = nft_bitmap_elem_find(net, set, this, genmask);
if (!be)
return NULL;
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index daa56dda737a..65bd291318f2 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -647,7 +647,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, struct nft_set *set,
int i;
for (i = 0; i < priv->buckets; i++) {
- hlist_for_each_entry_rcu(he, &priv->table[i], node) {
+ hlist_for_each_entry_rcu(he, &priv->table[i], node,
+ lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) {
if (iter->count < iter->skip)
goto cont;
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 5c6ed68cc6e0..681301b46aa4 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -497,10 +497,7 @@ static int nft_tunnel_obj_init(const struct nft_ctx *ctx,
}
if (tb[NFTA_TUNNEL_KEY_TOS])
info.key.tos = nla_get_u8(tb[NFTA_TUNNEL_KEY_TOS]);
- if (tb[NFTA_TUNNEL_KEY_TTL])
- info.key.ttl = nla_get_u8(tb[NFTA_TUNNEL_KEY_TTL]);
- else
- info.key.ttl = U8_MAX;
+ info.key.ttl = nla_get_u8_default(tb[NFTA_TUNNEL_KEY_TTL], U8_MAX);
if (tb[NFTA_TUNNEL_KEY_OPTS]) {
err = nft_tunnel_obj_opts_init(ctx, tb[NFTA_TUNNEL_KEY_OPTS],