summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--include/net/netfilter/nf_tables.h1
-rw-r--r--include/net/netfilter/nf_tables_core.h10
-rw-r--r--include/net/netns/nftables.h1
-rw-r--r--net/netfilter/nf_tables_api.c66
-rw-r--r--net/netfilter/nft_lookup.c46
-rw-r--r--net/netfilter/nft_set_bitmap.c3
-rw-r--r--net/netfilter/nft_set_pipapo.c20
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c4
-rw-r--r--net/netfilter/nft_set_rbtree.c6
10 files changed, 103 insertions, 55 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 2df02e4374ed..ba11421c33e5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17480,6 +17480,7 @@ NETFILTER
M: Pablo Neira Ayuso <pablo@netfilter.org>
M: Jozsef Kadlecsik <kadlec@netfilter.org>
M: Florian Westphal <fw@strlen.de>
+R: Phil Sutter <phil@nwl.cc>
L: netfilter-devel@vger.kernel.org
L: coreteam@netfilter.org
S: Maintained
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 891e43a01bdc..3faa80f5d801 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1912,7 +1912,6 @@ struct nftables_pernet {
struct mutex commit_mutex;
u64 table_handle;
u64 tstamp;
- unsigned int base_seq;
unsigned int gc_seq;
u8 validate_state;
struct work_struct destroy_work;
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 6c2f483d9828..656e784714f3 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -109,17 +109,11 @@ nft_hash_lookup_fast(const struct net *net, const struct nft_set *set,
const struct nft_set_ext *
nft_hash_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
+#endif
+
const struct nft_set_ext *
nft_set_do_lookup(const struct net *net, const struct nft_set *set,
const u32 *key);
-#else
-static inline const struct nft_set_ext *
-nft_set_do_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key)
-{
- return set->ops->lookup(net, set, key);
-}
-#endif
/* called from nft_pipapo_avx2.c */
const struct nft_set_ext *
diff --git a/include/net/netns/nftables.h b/include/net/netns/nftables.h
index cc8060c017d5..99dd166c5d07 100644
--- a/include/net/netns/nftables.h
+++ b/include/net/netns/nftables.h
@@ -3,6 +3,7 @@
#define _NETNS_NFTABLES_H_
struct netns_nftables {
+ unsigned int base_seq;
u8 gencursor;
};
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index c1082de09656..c3c73411c40c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1131,11 +1131,14 @@ nf_tables_chain_type_lookup(struct net *net, const struct nlattr *nla,
return ERR_PTR(-ENOENT);
}
-static __be16 nft_base_seq(const struct net *net)
+static unsigned int nft_base_seq(const struct net *net)
{
- struct nftables_pernet *nft_net = nft_pernet(net);
+ return READ_ONCE(net->nft.base_seq);
+}
- return htons(nft_net->base_seq & 0xffff);
+static __be16 nft_base_seq_be16(const struct net *net)
+{
+ return htons(nft_base_seq(net) & 0xffff);
}
static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
@@ -1155,7 +1158,7 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
- flags, family, NFNETLINK_V0, nft_base_seq(net));
+ flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -1248,7 +1251,7 @@ static int nf_tables_dump_tables(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -2030,7 +2033,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
- flags, family, NFNETLINK_V0, nft_base_seq(net));
+ flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -2133,7 +2136,7 @@ static int nf_tables_dump_chains(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -3671,7 +3674,7 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
u16 type = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nfnl_msg_put(skb, portid, seq, type, flags, family, NFNETLINK_V0,
- nft_base_seq(net));
+ nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -3839,7 +3842,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -4050,7 +4053,7 @@ static int nf_tables_getrule_reset(struct sk_buff *skb,
buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
nla_len(nla[NFTA_RULE_TABLE]),
(char *)nla_data(nla[NFTA_RULE_TABLE]),
- nft_net->base_seq);
+ nft_base_seq(net));
audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
AUDIT_NFT_OP_RULE_RESET, GFP_ATOMIC);
kfree(buf);
@@ -4887,7 +4890,7 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
flags, ctx->family, NFNETLINK_V0,
- nft_base_seq(ctx->net));
+ nft_base_seq_be16(ctx->net));
if (!nlh)
goto nla_put_failure;
@@ -5032,7 +5035,7 @@ static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (ctx->family != NFPROTO_UNSPEC &&
@@ -6209,7 +6212,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (dump_ctx->ctx.family != NFPROTO_UNSPEC &&
@@ -6238,7 +6241,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
seq = cb->nlh->nlmsg_seq;
nlh = nfnl_msg_put(skb, portid, seq, event, NLM_F_MULTI,
- table->family, NFNETLINK_V0, nft_base_seq(net));
+ table->family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -6331,7 +6334,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event);
nlh = nfnl_msg_put(skb, portid, seq, event, flags, ctx->family,
- NFNETLINK_V0, nft_base_seq(ctx->net));
+ NFNETLINK_V0, nft_base_seq_be16(ctx->net));
if (!nlh)
goto nla_put_failure;
@@ -6630,7 +6633,7 @@ static int nf_tables_getsetelem_reset(struct sk_buff *skb,
}
nelems++;
}
- audit_log_nft_set_reset(dump_ctx.ctx.table, nft_net->base_seq, nelems);
+ audit_log_nft_set_reset(dump_ctx.ctx.table, nft_base_seq(info->net), nelems);
out_unlock:
rcu_read_unlock();
@@ -8381,7 +8384,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
- flags, family, NFNETLINK_V0, nft_base_seq(net));
+ flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -8446,7 +8449,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -8480,7 +8483,7 @@ cont:
idx++;
}
if (ctx->reset && entries)
- audit_log_obj_reset(table, nft_net->base_seq, entries);
+ audit_log_obj_reset(table, nft_base_seq(net), entries);
if (rc < 0)
break;
}
@@ -8649,7 +8652,7 @@ static int nf_tables_getobj_reset(struct sk_buff *skb,
buf = kasprintf(GFP_ATOMIC, "%.*s:%u",
nla_len(nla[NFTA_OBJ_TABLE]),
(char *)nla_data(nla[NFTA_OBJ_TABLE]),
- nft_net->base_seq);
+ nft_base_seq(net));
audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1,
AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC);
kfree(buf);
@@ -8754,9 +8757,8 @@ void nft_obj_notify(struct net *net, const struct nft_table *table,
struct nft_object *obj, u32 portid, u32 seq, int event,
u16 flags, int family, int report, gfp_t gfp)
{
- struct nftables_pernet *nft_net = nft_pernet(net);
char *buf = kasprintf(gfp, "%s:%u",
- table->name, nft_net->base_seq);
+ table->name, nft_base_seq(net));
audit_log_nfcfg(buf,
family,
@@ -9442,7 +9444,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
nlh = nfnl_msg_put(skb, portid, seq,
nfnl_msg_type(NFNL_SUBSYS_NFTABLES, event),
- flags, family, NFNETLINK_V0, nft_base_seq(net));
+ flags, family, NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
@@ -9511,7 +9513,7 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
rcu_read_lock();
nft_net = nft_pernet(net);
- cb->seq = READ_ONCE(nft_net->base_seq);
+ cb->seq = nft_base_seq(net);
list_for_each_entry_rcu(table, &nft_net->tables, list) {
if (family != NFPROTO_UNSPEC && family != table->family)
@@ -9696,17 +9698,16 @@ static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
- struct nftables_pernet *nft_net = nft_pernet(net);
struct nlmsghdr *nlh;
char buf[TASK_COMM_LEN];
int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN);
nlh = nfnl_msg_put(skb, portid, seq, event, 0, AF_UNSPEC,
- NFNETLINK_V0, nft_base_seq(net));
+ NFNETLINK_V0, nft_base_seq_be16(net));
if (!nlh)
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_net->base_seq)) ||
+ if (nla_put_be32(skb, NFTA_GEN_ID, htonl(nft_base_seq(net))) ||
nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) ||
nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current)))
goto nla_put_failure;
@@ -10968,11 +10969,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
* Bump generation counter, invalidate any dump in progress.
* Cannot fail after this point.
*/
- base_seq = READ_ONCE(nft_net->base_seq);
+ base_seq = nft_base_seq(net);
while (++base_seq == 0)
;
- WRITE_ONCE(nft_net->base_seq, base_seq);
+ /* pairs with smp_load_acquire in nft_lookup_eval */
+ smp_store_release(&net->nft.base_seq, base_seq);
gc_seq = nft_gc_seq_begin(nft_net);
@@ -11181,7 +11183,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
- nf_tables_commit_audit_log(&adl, nft_net->base_seq);
+ nf_tables_commit_audit_log(&adl, nft_base_seq(net));
nft_gc_seq_end(nft_net, gc_seq);
nft_net->validate_state = NFT_VALIDATE_SKIP;
@@ -11506,7 +11508,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
mutex_lock(&nft_net->commit_mutex);
nft_net->tstamp = get_jiffies_64();
- genid_ok = genid == 0 || nft_net->base_seq == genid;
+ genid_ok = genid == 0 || nft_base_seq(net) == genid;
if (!genid_ok)
mutex_unlock(&nft_net->commit_mutex);
@@ -12143,7 +12145,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
- nft_net->base_seq = 1;
+ net->nft.base_seq = 1;
nft_net->gc_seq = 0;
nft_net->validate_state = NFT_VALIDATE_SKIP;
INIT_WORK(&nft_net->destroy_work, nf_tables_trans_destroy_work);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 40c602ffbcba..58c5b14889c4 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -24,11 +24,11 @@ struct nft_lookup {
struct nft_set_binding binding;
};
-#ifdef CONFIG_MITIGATION_RETPOLINE
-const struct nft_set_ext *
-nft_set_do_lookup(const struct net *net, const struct nft_set *set,
- const u32 *key)
+static const struct nft_set_ext *
+__nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
{
+#ifdef CONFIG_MITIGATION_RETPOLINE
if (set->ops == &nft_set_hash_fast_type.ops)
return nft_hash_lookup_fast(net, set, key);
if (set->ops == &nft_set_hash_type.ops)
@@ -51,10 +51,46 @@ nft_set_do_lookup(const struct net *net, const struct nft_set *set,
return nft_rbtree_lookup(net, set, key);
WARN_ON_ONCE(1);
+#endif
return set->ops->lookup(net, set, key);
}
+
+static unsigned int nft_base_seq(const struct net *net)
+{
+ /* pairs with smp_store_release() in nf_tables_commit() */
+ return smp_load_acquire(&net->nft.base_seq);
+}
+
+static bool nft_lookup_should_retry(const struct net *net, unsigned int seq)
+{
+ return unlikely(seq != nft_base_seq(net));
+}
+
+const struct nft_set_ext *
+nft_set_do_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key)
+{
+ const struct nft_set_ext *ext;
+ unsigned int base_seq;
+
+ do {
+ base_seq = nft_base_seq(net);
+
+ ext = __nft_set_do_lookup(net, set, key);
+ if (ext)
+ break;
+ /* No match? There is a small chance that lookup was
+ * performed in the old generation, but nf_tables_commit()
+ * already unlinked a (matching) element.
+ *
+ * We need to repeat the lookup to make sure that we didn't
+ * miss a matching element in the new generation.
+ */
+ } while (nft_lookup_should_retry(net, base_seq));
+
+ return ext;
+}
EXPORT_SYMBOL_GPL(nft_set_do_lookup);
-#endif
void nft_lookup_eval(const struct nft_expr *expr,
struct nft_regs *regs,
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index c24c922f895d..8d3f040a904a 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -226,7 +226,8 @@ static void nft_bitmap_walk(const struct nft_ctx *ctx,
const struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be;
- list_for_each_entry_rcu(be, &priv->list, head) {
+ list_for_each_entry_rcu(be, &priv->list, head,
+ lockdep_is_held(&nft_pernet(ctx->net)->commit_mutex)) {
if (iter->count < iter->skip)
goto cont;
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 9a10251228fd..793790d79d13 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -510,6 +510,23 @@ out:
*
* This function is called from the data path. It will search for
* an element matching the given key in the current active copy.
+ * Unlike other set types, this uses NFT_GENMASK_ANY instead of
+ * nft_genmask_cur().
+ *
+ * This is because new (future) elements are not reachable from
+ * priv->match, they get added to priv->clone instead.
+ * When the commit phase flips the generation bitmask, the
+ * 'now old' entries are skipped but without the 'now current'
+ * elements becoming visible. Using nft_genmask_cur() thus creates
+ * inconsistent state: matching old entries get skipped but thew
+ * newly matching entries are unreachable.
+ *
+ * GENMASK will still find the 'now old' entries which ensures consistent
+ * priv->match view.
+ *
+ * nft_pipapo_commit swaps ->clone and ->match shortly after the
+ * genbit flip. As ->clone doesn't contain the old entries in the first
+ * place, lookup will only find the now-current ones.
*
* Return: ntables API extension pointer or NULL if no match.
*/
@@ -518,12 +535,11 @@ nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_pipapo *priv = nft_set_priv(set);
- u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_elem *e;
m = rcu_dereference(priv->match);
- e = pipapo_get(m, (const u8 *)key, genmask, get_jiffies_64());
+ e = pipapo_get(m, (const u8 *)key, NFT_GENMASK_ANY, get_jiffies_64());
return e ? &e->ext : NULL;
}
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 2f090e253caf..c0884fa68c79 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -1152,7 +1152,6 @@ nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
struct nft_pipapo *priv = nft_set_priv(set);
const struct nft_set_ext *ext = NULL;
struct nft_pipapo_scratch *scratch;
- u8 genmask = nft_genmask_cur(net);
const struct nft_pipapo_match *m;
const struct nft_pipapo_field *f;
const u8 *rp = (const u8 *)key;
@@ -1248,8 +1247,7 @@ next_match:
if (last) {
const struct nft_set_ext *e = &f->mt[ret].e->ext;
- if (unlikely(nft_set_elem_expired(e) ||
- !nft_set_elem_active(e, genmask)))
+ if (unlikely(nft_set_elem_expired(e)))
goto next_match;
ext = e;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 938a257c069e..b1f04168ec93 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -77,7 +77,9 @@ __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_start(interval))
continue;
- interval = rbe;
+ if (nft_set_elem_active(&rbe->ext, genmask) &&
+ !nft_rbtree_elem_expired(rbe))
+ interval = rbe;
} else if (d > 0)
parent = rcu_dereference_raw(parent->rb_right);
else {
@@ -102,8 +104,6 @@ __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
}
if (set->flags & NFT_SET_INTERVAL && interval != NULL &&
- nft_set_elem_active(&interval->ext, genmask) &&
- !nft_rbtree_elem_expired(interval) &&
nft_rbtree_interval_start(interval))
return &interval->ext;