summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/vrf.c450
-rw-r--r--include/net/l3mdev.h39
-rw-r--r--net/l3mdev/l3mdev.c93
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh390
4 files changed, 963 insertions, 9 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 43928a1c2f2a..46599606ff10 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -21,6 +21,7 @@
#include <net/rtnetlink.h>
#include <linux/u64_stats_sync.h>
#include <linux/hashtable.h>
+#include <linux/spinlock_types.h>
#include <linux/inetdevice.h>
#include <net/arp.h>
@@ -35,12 +36,76 @@
#include <net/netns/generic.h>
#define DRV_NAME "vrf"
-#define DRV_VERSION "1.0"
+#define DRV_VERSION "1.1"
#define FIB_RULE_PREF 1000 /* default preference for FIB rules */
+#define HT_MAP_BITS 4
+#define HASH_INITVAL ((u32)0xcafef00d)
+
+struct vrf_map {
+ DECLARE_HASHTABLE(ht, HT_MAP_BITS);
+ spinlock_t vmap_lock;
+
+ /* shared_tables:
+ * count how many distinct tables do not comply with the strict mode
+ * requirement.
+ * shared_tables value must be 0 in order to enable the strict mode.
+ *
+ * example of the evolution of shared_tables:
+ * | time
+ * add vrf0 --> table 100 shared_tables = 0 | t0
+ * add vrf1 --> table 101 shared_tables = 0 | t1
+ * add vrf2 --> table 100 shared_tables = 1 | t2
+ * add vrf3 --> table 100 shared_tables = 1 | t3
+ * add vrf4 --> table 101 shared_tables = 2 v t4
+ *
+ * shared_tables is a "step function" (or "staircase function")
+ * and it is increased by one when the second vrf is associated to a
+ * table.
+ *
+ * at t2, vrf0 and vrf2 are bound to table 100: shared_tables = 1.
+ *
+ * at t3, another dev (vrf3) is bound to the same table 100 but the
+ * value of shared_tables is still 1.
+ * This means that no matter how many new vrfs will register on the
+ * table 100, the shared_tables will not increase (considering only
+ * table 100).
+ *
+ * at t4, vrf4 is bound to table 101, and shared_tables = 2.
+ *
+ * Looking at the value of shared_tables we can immediately know if
+ * the strict_mode can or cannot be enforced. Indeed, strict_mode
+ * can be enforced iff shared_tables = 0.
+ *
+ * Conversely, shared_tables is decreased when a vrf is de-associated
+ * from a table with exactly two associated vrfs.
+ */
+ u32 shared_tables;
+
+ bool strict_mode;
+};
+
+struct vrf_map_elem {
+ struct hlist_node hnode;
+ struct list_head vrf_list; /* VRFs registered to this table */
+
+ u32 table_id;
+ int users;
+ int ifindex;
+};
+
static unsigned int vrf_net_id;
+/* per netns vrf data */
+struct netns_vrf {
+ /* protected by rtnl lock */
+ bool add_fib_rules;
+
+ struct vrf_map vmap;
+ struct ctl_table_header *ctl_hdr;
+};
+
struct net_vrf {
struct rtable __rcu *rth;
struct rt6_info __rcu *rt6;
@@ -48,6 +113,9 @@ struct net_vrf {
struct fib6_table *fib6_table;
#endif
u32 tb_id;
+
+ struct list_head me_list; /* entry in vrf_map_elem */
+ int ifindex;
};
struct pcpu_dstats {
@@ -103,6 +171,260 @@ static void vrf_get_stats64(struct net_device *dev,
}
}
+static struct vrf_map *netns_vrf_map(struct net *net)
+{
+ struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
+
+ return &nn_vrf->vmap;
+}
+
+static struct vrf_map *netns_vrf_map_by_dev(struct net_device *dev)
+{
+ return netns_vrf_map(dev_net(dev));
+}
+
+static int vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem *me)
+{
+ struct list_head *me_head = &me->vrf_list;
+ struct net_vrf *vrf;
+
+ if (list_empty(me_head))
+ return -ENODEV;
+
+ vrf = list_first_entry(me_head, struct net_vrf, me_list);
+
+ return vrf->ifindex;
+}
+
+static struct vrf_map_elem *vrf_map_elem_alloc(gfp_t flags)
+{
+ struct vrf_map_elem *me;
+
+ me = kmalloc(sizeof(*me), flags);
+ if (!me)
+ return NULL;
+
+ return me;
+}
+
+static void vrf_map_elem_free(struct vrf_map_elem *me)
+{
+ kfree(me);
+}
+
+static void vrf_map_elem_init(struct vrf_map_elem *me, int table_id,
+ int ifindex, int users)
+{
+ me->table_id = table_id;
+ me->ifindex = ifindex;
+ me->users = users;
+ INIT_LIST_HEAD(&me->vrf_list);
+}
+
+static struct vrf_map_elem *vrf_map_lookup_elem(struct vrf_map *vmap,
+ u32 table_id)
+{
+ struct vrf_map_elem *me;
+ u32 key;
+
+ key = jhash_1word(table_id, HASH_INITVAL);
+ hash_for_each_possible(vmap->ht, me, hnode, key) {
+ if (me->table_id == table_id)
+ return me;
+ }
+
+ return NULL;
+}
+
+static void vrf_map_add_elem(struct vrf_map *vmap, struct vrf_map_elem *me)
+{
+ u32 table_id = me->table_id;
+ u32 key;
+
+ key = jhash_1word(table_id, HASH_INITVAL);
+ hash_add(vmap->ht, &me->hnode, key);
+}
+
+static void vrf_map_del_elem(struct vrf_map_elem *me)
+{
+ hash_del(&me->hnode);
+}
+
+static void vrf_map_lock(struct vrf_map *vmap) __acquires(&vmap->vmap_lock)
+{
+ spin_lock(&vmap->vmap_lock);
+}
+
+static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock)
+{
+ spin_unlock(&vmap->vmap_lock);
+}
+
+static bool vrf_strict_mode(struct vrf_map *vmap)
+{
+ bool strict_mode;
+
+ vrf_map_lock(vmap);
+ strict_mode = vmap->strict_mode;
+ vrf_map_unlock(vmap);
+
+ return strict_mode;
+}
+
+static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode)
+{
+ bool *cur_mode;
+ int res = 0;
+
+ vrf_map_lock(vmap);
+
+ cur_mode = &vmap->strict_mode;
+ if (*cur_mode == new_mode)
+ goto unlock;
+
+ if (*cur_mode) {
+ /* disable strict mode */
+ *cur_mode = false;
+ } else {
+ if (vmap->shared_tables) {
+ /* we cannot allow strict_mode because there are some
+ * vrfs that share one or more tables.
+ */
+ res = -EBUSY;
+ goto unlock;
+ }
+
+ /* no tables are shared among vrfs, so we can go back
+ * to 1:1 association between a vrf with its table.
+ */
+ *cur_mode = true;
+ }
+
+unlock:
+ vrf_map_unlock(vmap);
+
+ return res;
+}
+
+/* called with rtnl lock held */
+static int
+vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack)
+{
+ struct vrf_map *vmap = netns_vrf_map_by_dev(dev);
+ struct net_vrf *vrf = netdev_priv(dev);
+ struct vrf_map_elem *new_me, *me;
+ u32 table_id = vrf->tb_id;
+ bool free_new_me = false;
+ int users;
+ int res;
+
+ /* we pre-allocate elements used in the spin-locked section (so that we
+ * keep the spinlock as short as possibile).
+ */
+ new_me = vrf_map_elem_alloc(GFP_KERNEL);
+ if (!new_me)
+ return -ENOMEM;
+
+ vrf_map_elem_init(new_me, table_id, dev->ifindex, 0);
+
+ vrf_map_lock(vmap);
+
+ me = vrf_map_lookup_elem(vmap, table_id);
+ if (!me) {
+ me = new_me;
+ vrf_map_add_elem(vmap, me);
+ goto link_vrf;
+ }
+
+ /* we already have an entry in the vrf_map, so it means there is (at
+ * least) a vrf registered on the specific table.
+ */
+ free_new_me = true;
+ if (vmap->strict_mode) {
+ /* vrfs cannot share the same table */
+ NL_SET_ERR_MSG(extack, "Table is used by another VRF");
+ res = -EBUSY;
+ goto unlock;
+ }
+
+link_vrf:
+ users = ++me->users;
+ if (users == 2)
+ ++vmap->shared_tables;
+
+ list_add(&vrf->me_list, &me->vrf_list);
+
+ res = 0;
+
+unlock:
+ vrf_map_unlock(vmap);
+
+ /* clean-up, if needed */
+ if (free_new_me)
+ vrf_map_elem_free(new_me);
+
+ return res;
+}
+
+/* called with rtnl lock held */
+static void vrf_map_unregister_dev(struct net_device *dev)
+{
+ struct vrf_map *vmap = netns_vrf_map_by_dev(dev);
+ struct net_vrf *vrf = netdev_priv(dev);
+ u32 table_id = vrf->tb_id;
+ struct vrf_map_elem *me;
+ int users;
+
+ vrf_map_lock(vmap);
+
+ me = vrf_map_lookup_elem(vmap, table_id);
+ if (!me)
+ goto unlock;
+
+ list_del(&vrf->me_list);
+
+ users = --me->users;
+ if (users == 1) {
+ --vmap->shared_tables;
+ } else if (users == 0) {
+ vrf_map_del_elem(me);
+
+ /* no one will refer to this element anymore */
+ vrf_map_elem_free(me);
+ }
+
+unlock:
+ vrf_map_unlock(vmap);
+}
+
+/* return the vrf device index associated with the table_id */
+static int vrf_ifindex_lookup_by_table_id(struct net *net, u32 table_id)
+{
+ struct vrf_map *vmap = netns_vrf_map(net);
+ struct vrf_map_elem *me;
+ int ifindex;
+
+ vrf_map_lock(vmap);
+
+ if (!vmap->strict_mode) {
+ ifindex = -EPERM;
+ goto unlock;
+ }
+
+ me = vrf_map_lookup_elem(vmap, table_id);
+ if (!me) {
+ ifindex = -ENODEV;
+ goto unlock;
+ }
+
+ ifindex = vrf_map_elem_get_vrf_ifindex(me);
+
+unlock:
+ vrf_map_unlock(vmap);
+
+ return ifindex;
+}
+
/* by default VRF devices do not have a qdisc and are expected
* to be created with only a single queue.
*/
@@ -1319,6 +1641,8 @@ static void vrf_dellink(struct net_device *dev, struct list_head *head)
netdev_for_each_lower_dev(dev, port_dev, iter)
vrf_del_slave(dev, port_dev);
+ vrf_map_unregister_dev(dev);
+
unregister_netdevice_queue(dev, head);
}
@@ -1327,6 +1651,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
struct netlink_ext_ack *extack)
{
struct net_vrf *vrf = netdev_priv(dev);
+ struct netns_vrf *nn_vrf;
bool *add_fib_rules;
struct net *net;
int err;
@@ -1349,11 +1674,26 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
if (err)
goto out;
+ /* mapping between table_id and vrf;
+ * note: such binding could not be done in the dev init function
+ * because dev->ifindex id is not available yet.
+ */
+ vrf->ifindex = dev->ifindex;
+
+ err = vrf_map_register_dev(dev, extack);
+ if (err) {
+ unregister_netdevice(dev);
+ goto out;
+ }
+
net = dev_net(dev);
- add_fib_rules = net_generic(net, vrf_net_id);
+ nn_vrf = net_generic(net, vrf_net_id);
+
+ add_fib_rules = &nn_vrf->add_fib_rules;
if (*add_fib_rules) {
err = vrf_add_fib_rules(dev);
if (err) {
+ vrf_map_unregister_dev(dev);
unregister_netdevice(dev);
goto out;
}
@@ -1440,20 +1780,102 @@ static struct notifier_block vrf_notifier_block __read_mostly = {
.notifier_call = vrf_device_event,
};
+static int vrf_map_init(struct vrf_map *vmap)
+{
+ spin_lock_init(&vmap->vmap_lock);
+ hash_init(vmap->ht);
+
+ vmap->strict_mode = false;
+
+ return 0;
+}
+
+static int vrf_shared_table_handler(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = (struct net *)table->extra1;
+ struct vrf_map *vmap = netns_vrf_map(net);
+ int proc_strict_mode = 0;
+ struct ctl_table tmp = {
+ .procname = table->procname,
+ .data = &proc_strict_mode,
+ .maxlen = sizeof(int),
+ .mode = table->mode,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ };
+ int ret;
+
+ if (!write)
+ proc_strict_mode = vrf_strict_mode(vmap);
+
+ ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+
+ if (write && ret == 0)
+ ret = vrf_strict_mode_change(vmap, (bool)proc_strict_mode);
+
+ return ret;
+}
+
+static const struct ctl_table vrf_table[] = {
+ {
+ .procname = "strict_mode",
+ .data = NULL,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = vrf_shared_table_handler,
+ /* set by the vrf_netns_init */
+ .extra1 = NULL,
+ },
+ { },
+};
+
/* Initialize per network namespace state */
static int __net_init vrf_netns_init(struct net *net)
{
- bool *add_fib_rules = net_generic(net, vrf_net_id);
+ struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
+ struct ctl_table *table;
+ int res;
+
+ nn_vrf->add_fib_rules = true;
+ vrf_map_init(&nn_vrf->vmap);
+
+ table = kmemdup(vrf_table, sizeof(vrf_table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
+
+ /* init the extra1 parameter with the reference to current netns */
+ table[0].extra1 = net;
- *add_fib_rules = true;
+ nn_vrf->ctl_hdr = register_net_sysctl(net, "net/vrf", table);
+ if (!nn_vrf->ctl_hdr) {
+ res = -ENOMEM;
+ goto free_table;
+ }
return 0;
+
+free_table:
+ kfree(table);
+
+ return res;
+}
+
+static void __net_exit vrf_netns_exit(struct net *net)
+{
+ struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id);
+ struct ctl_table *table;
+
+ table = nn_vrf->ctl_hdr->ctl_table_arg;
+ unregister_net_sysctl_table(nn_vrf->ctl_hdr);
+ kfree(table);
}
static struct pernet_operations vrf_net_ops __net_initdata = {
.init = vrf_netns_init,
+ .exit = vrf_netns_exit,
.id = &vrf_net_id,
- .size = sizeof(bool),
+ .size = sizeof(struct netns_vrf),
};
static int __init vrf_init_module(void)
@@ -1466,14 +1888,24 @@ static int __init vrf_init_module(void)
if (rc < 0)
goto error;
+ rc = l3mdev_table_lookup_register(L3MDEV_TYPE_VRF,
+ vrf_ifindex_lookup_by_table_id);
+ if (rc < 0)
+ goto unreg_pernet;
+
rc = rtnl_link_register(&vrf_link_ops);
- if (rc < 0) {
- unregister_pernet_subsys(&vrf_net_ops);
- goto error;
- }
+ if (rc < 0)
+ goto table_lookup_unreg;
return 0;
+table_lookup_unreg:
+ l3mdev_table_lookup_unregister(L3MDEV_TYPE_VRF,
+ vrf_ifindex_lookup_by_table_id);
+
+unreg_pernet:
+ unregister_pernet_subsys(&vrf_net_ops);
+
error:
unregister_netdevice_notifier(&vrf_notifier_block);
return rc;
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index e942372b077b..031c661aa14d 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -10,6 +10,16 @@
#include <net/dst.h>
#include <net/fib_rules.h>
+enum l3mdev_type {
+ L3MDEV_TYPE_UNSPEC,
+ L3MDEV_TYPE_VRF,
+ __L3MDEV_TYPE_MAX
+};
+
+#define L3MDEV_TYPE_MAX (__L3MDEV_TYPE_MAX - 1)
+
+typedef int (*lookup_by_table_id_t)(struct net *net, u32 table_d);
+
/**
* struct l3mdev_ops - l3mdev operations
*
@@ -37,6 +47,15 @@ struct l3mdev_ops {
#ifdef CONFIG_NET_L3_MASTER_DEV
+int l3mdev_table_lookup_register(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn);
+
+void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn);
+
+int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
+ u32 table_id);
+
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg);
@@ -281,6 +300,26 @@ struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb)
}
static inline
+int l3mdev_table_lookup_register(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline
+void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+}
+
+static inline
+int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type, struct net *net,
+ u32 table_id)
+{
+ return -ENODEV;
+}
+
+static inline
int l3mdev_fib_rule_match(struct net *net, struct flowi *fl,
struct fib_lookup_arg *arg)
{
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index f35899d45a9a..e71ca5aec684 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -9,6 +9,99 @@
#include <net/fib_rules.h>
#include <net/l3mdev.h>
+static DEFINE_SPINLOCK(l3mdev_lock);
+
+struct l3mdev_handler {
+ lookup_by_table_id_t dev_lookup;
+};
+
+static struct l3mdev_handler l3mdev_handlers[L3MDEV_TYPE_MAX + 1];
+
+static int l3mdev_check_type(enum l3mdev_type l3type)
+{
+ if (l3type <= L3MDEV_TYPE_UNSPEC || l3type > L3MDEV_TYPE_MAX)
+ return -EINVAL;
+
+ return 0;
+}
+
+int l3mdev_table_lookup_register(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+ struct l3mdev_handler *hdlr;
+ int res;
+
+ res = l3mdev_check_type(l3type);
+ if (res)
+ return res;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ if (hdlr->dev_lookup) {
+ res = -EBUSY;
+ goto unlock;
+ }
+
+ hdlr->dev_lookup = fn;
+ res = 0;
+
+unlock:
+ spin_unlock(&l3mdev_lock);
+
+ return res;
+}
+EXPORT_SYMBOL_GPL(l3mdev_table_lookup_register);
+
+void l3mdev_table_lookup_unregister(enum l3mdev_type l3type,
+ lookup_by_table_id_t fn)
+{
+ struct l3mdev_handler *hdlr;
+
+ if (l3mdev_check_type(l3type))
+ return;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ if (hdlr->dev_lookup == fn)
+ hdlr->dev_lookup = NULL;
+
+ spin_unlock(&l3mdev_lock);
+}
+EXPORT_SYMBOL_GPL(l3mdev_table_lookup_unregister);
+
+int l3mdev_ifindex_lookup_by_table_id(enum l3mdev_type l3type,
+ struct net *net, u32 table_id)
+{
+ lookup_by_table_id_t lookup;
+ struct l3mdev_handler *hdlr;
+ int ifindex = -EINVAL;
+ int res;
+
+ res = l3mdev_check_type(l3type);
+ if (res)
+ return res;
+
+ hdlr = &l3mdev_handlers[l3type];
+
+ spin_lock(&l3mdev_lock);
+
+ lookup = hdlr->dev_lookup;
+ if (!lookup)
+ goto unlock;
+
+ ifindex = lookup(net, table_id);
+
+unlock:
+ spin_unlock(&l3mdev_lock);
+
+ return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_ifindex_lookup_by_table_id);
+
/**
* l3mdev_master_ifindex - get index of L3 master device
* @dev: targeted interface
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
new file mode 100755
index 000000000000..5274f4a1fba1
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -0,0 +1,390 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is designed for testing the new VRF strict_mode functionality.
+
+ret=0
+
+# identifies the "init" network namespace which is often called root network
+# namespace.
+INIT_NETNS_NAME="init"
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+ip_expand_args()
+{
+ local nsname=$1
+ local nsarg=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ nsarg="-netns ${nsname}"
+ fi
+
+ echo "${nsarg}"
+}
+
+vrf_count()
+{
+ local nsname=$1
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -o link show type vrf | wc -l
+}
+
+count_vrf_by_table_id()
+{
+ local nsname=$1
+ local tableid=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l
+}
+
+add_vrf()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null
+}
+
+add_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+add_vrf_and_check_fail()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+del_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link del ${vrfname}
+ log_test $? 0 "${nsname}: remove vrf ${vrfname}"
+}
+
+config_vrf_and_check()
+{
+ local nsname=$1
+ local addr=$2
+ local vrfname=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link set dev ${vrfname} up && \
+ ip ${nsarg} addr add ${addr} dev ${vrfname}
+ log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}"
+}
+
+read_strict_mode()
+{
+ local nsname=$1
+ local rval
+ local rc=0
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \
+ grep -E "^[0-1]$")" &> /dev/null
+ if [ $? -ne 0 ]; then
+ # set errors
+ rval=255
+ rc=1
+ fi
+
+ # on success, rval can be only 0 or 1; on error, rval is equal to 255
+ echo ${rval}
+ return ${rc}
+}
+
+read_strict_mode_compare_and_check()
+{
+ local nsname=$1
+ local expected=$2
+ local res
+
+ res="$(read_strict_mode ${nsname})"
+ log_test ${res} ${expected} "${nsname}: check strict_mode=${res}"
+}
+
+set_strict_mode()
+{
+ local nsname=$1
+ local val=$2
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ ${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null
+}
+
+enable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 1
+}
+
+disable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 0
+}
+
+disable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ disable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: disable strict_mode (=0)"
+}
+
+enable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: enable strict_mode (=1)"
+}
+
+enable_strict_mode_and_check_fail()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 1 "${nsname}: CANNOT enable strict_mode"
+}
+
+strict_mode_check_default()
+{
+ local nsname=$1
+ local strictmode
+ local vrfcnt
+
+ vrfcnt=$(vrf_count ${nsname})
+ strictmode=$(read_strict_mode ${nsname})
+ log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs"
+}
+
+setup()
+{
+ modprobe vrf
+
+ ip netns add testns
+ ip netns exec testns ip link set lo up
+}
+
+cleanup()
+{
+ ip netns del testns 2>/dev/null
+
+ ip link del vrf100 2>/dev/null
+ ip link del vrf101 2>/dev/null
+ ip link del vrf102 2>/dev/null
+
+ echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null
+}
+
+vrf_strict_mode_tests_init()
+{
+ vrf_strict_mode_check_support init
+
+ strict_mode_check_default init
+
+ add_vrf_and_check init vrf100 100
+ config_vrf_and_check init 172.16.100.1/24 vrf100
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check_fail init vrf101 100
+
+ disable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf101 100
+ config_vrf_and_check init 172.16.101.1/24 vrf101
+
+ enable_strict_mode_and_check_fail init
+
+ del_vrf_and_check init vrf101
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf102 102
+ config_vrf_and_check init 172.16.102.1/24 vrf102
+
+ # the strict_modle is enabled in the init
+}
+
+vrf_strict_mode_tests_testns()
+{
+ vrf_strict_mode_check_support testns
+
+ strict_mode_check_default testns
+
+ enable_strict_mode_and_check testns
+
+ add_vrf_and_check testns vrf100 100
+ config_vrf_and_check testns 10.0.100.1/24 vrf100
+
+ add_vrf_and_check_fail testns vrf101 100
+
+ add_vrf_and_check_fail testns vrf102 100
+
+ add_vrf_and_check testns vrf200 200
+
+ disable_strict_mode_and_check testns
+
+ add_vrf_and_check testns vrf101 100
+
+ add_vrf_and_check testns vrf102 100
+
+ #the strict_mode is disabled in the testns
+}
+
+vrf_strict_mode_tests_mix()
+{
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check testns 0
+
+ del_vrf_and_check testns vrf101
+
+ del_vrf_and_check testns vrf102
+
+ disable_strict_mode_and_check init
+
+ enable_strict_mode_and_check testns
+
+ enable_strict_mode_and_check init
+ enable_strict_mode_and_check init
+
+ disable_strict_mode_and_check testns
+ disable_strict_mode_and_check testns
+
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check testns 0
+}
+
+vrf_strict_mode_tests()
+{
+ log_section "VRF strict_mode test on init network namespace"
+ vrf_strict_mode_tests_init
+
+ log_section "VRF strict_mode test on testns network namespace"
+ vrf_strict_mode_tests_testns
+
+ log_section "VRF strict_mode test mixing init and testns network namespaces"
+ vrf_strict_mode_tests_mix
+}
+
+vrf_strict_mode_check_support()
+{
+ local nsname=$1
+ local output
+ local rc
+
+ output="$(lsmod | grep '^vrf' | awk '{print $1}')"
+ if [ -z "${output}" ]; then
+ modinfo vrf || return $?
+ fi
+
+ # we do not care about the value of the strict_mode; we only check if
+ # the strict_mode parameter is available or not.
+ read_strict_mode ${nsname} &>/dev/null; rc=$?
+ log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available"
+
+ return ${rc}
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+cleanup &> /dev/null
+
+setup
+vrf_strict_mode_tests
+cleanup
+
+print_log_test_results
+
+exit $ret