bridge: switchdev: Add forward mark support for stacked devices
switchdev_port_fwd_mark_set() is used to set the 'offload_fwd_mark' of
port netdevs so that packets being flooded by the device won't be
flooded twice.
It works by assigning a unique identifier (the ifindex of the first
bridge port) to bridge ports sharing the same parent ID. This prevents
packets from being flooded twice by the same switch, but will flood
packets through bridge ports belonging to a different switch.
This method is problematic when stacked devices are taken into account,
such as VLANs. In such cases, a physical port netdev can have upper
devices being members in two different bridges, thus requiring two
different 'offload_fwd_mark's to be configured on the port netdev, which
is impossible.
The main problem is that packet and netdev marking is performed at the
physical netdev level, whereas flooding occurs between bridge ports,
which are not necessarily port netdevs.
Instead, packet and netdev marking should really be done in the bridge
driver with the switch driver only telling it which packets it already
forwarded. The bridge driver will mark such packets using the mark
assigned to the ingress bridge port and will prevent the packet from
being forwarded through any bridge port sharing the same mark (i.e.
having the same parent ID).
Remove the current switchdev 'offload_fwd_mark' implementation and
instead implement the proposed method. In addition, make rocker - the
sole user of the mark - use the proposed method.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index a1cda5d..0aefc01 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -20,4 +20,6 @@
bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o
+bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
+
obj-$(CONFIG_NETFILTER) += netfilter/
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 63a83d8..32a02de 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -29,7 +29,8 @@
vg = nbp_vlan_group_rcu(p);
return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) &&
- br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING;
+ br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING &&
+ nbp_switchdev_allowed_egress(p, skb);
}
int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f2fede0..1da3221 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -545,6 +545,10 @@
if (err)
goto err5;
+ err = nbp_switchdev_mark_set(p);
+ if (err)
+ goto err6;
+
dev_disable_lro(dev);
list_add_rcu(&p->list, &br->port_list);
@@ -566,7 +570,7 @@
err = nbp_vlan_init(p);
if (err) {
netdev_err(dev, "failed to initialize vlan filtering on this port\n");
- goto err6;
+ goto err7;
}
spin_lock_bh(&br->lock);
@@ -589,12 +593,12 @@
return 0;
-err6:
+err7:
list_del_rcu(&p->list);
br_fdb_delete_by_port(br, p, 0, 1);
nbp_update_port_count(br);
+err6:
netdev_upper_dev_unlink(dev, br->dev);
-
err5:
dev->priv_flags &= ~IFF_BRIDGE_PORT;
netdev_rx_handler_unregister(dev);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 8e48620..3132cfc 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -145,6 +145,8 @@
if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid))
goto out;
+ nbp_switchdev_frame_mark(p, skb);
+
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
if (p->flags & BR_LEARNING)
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aac2a6e..2379b2b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -251,6 +251,9 @@
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu *vlgrp;
#endif
+#ifdef CONFIG_NET_SWITCHDEV
+ int offload_fwd_mark;
+#endif
};
#define br_auto_port(p) ((p)->flags & BR_AUTO_MASK)
@@ -359,6 +362,11 @@
struct timer_list gc_timer;
struct kobject *ifobj;
u32 auto_cnt;
+
+#ifdef CONFIG_NET_SWITCHDEV
+ int offload_fwd_mark;
+#endif
+
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
struct net_bridge_vlan_group __rcu *vlgrp;
u8 vlan_enabled;
@@ -381,6 +389,10 @@
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool vlan_filtered;
#endif
+
+#ifdef CONFIG_NET_SWITCHDEV
+ int offload_fwd_mark;
+#endif
};
#define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb)
@@ -1034,4 +1046,29 @@
static inline void br_sysfs_delbr(struct net_device *dev) { return; }
#endif /* CONFIG_SYSFS */
+/* br_switchdev.c */
+#ifdef CONFIG_NET_SWITCHDEV
+int nbp_switchdev_mark_set(struct net_bridge_port *p);
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+ struct sk_buff *skb);
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+ const struct sk_buff *skb);
+#else
+static inline int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+ return 0;
+}
+
+static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+}
+
+static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+ const struct sk_buff *skb)
+{
+ return true;
+}
+#endif /* CONFIG_NET_SWITCHDEV */
+
#endif
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
new file mode 100644
index 0000000..f4097b9
--- /dev/null
+++ b/net/bridge/br_switchdev.c
@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+#include <net/switchdev.h>
+
+#include "br_private.h"
+
+static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
+{
+ struct net_bridge_port *p;
+
+ /* dev is yet to be added to the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (switchdev_port_same_parent_id(dev, p->dev))
+ return p->offload_fwd_mark;
+ }
+
+ return ++br->offload_fwd_mark;
+}
+
+int nbp_switchdev_mark_set(struct net_bridge_port *p)
+{
+ struct switchdev_attr attr = {
+ .orig_dev = p->dev,
+ .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
+ };
+ int err;
+
+ ASSERT_RTNL();
+
+ err = switchdev_port_attr_get(p->dev, &attr);
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ return 0;
+ return err;
+ }
+
+ p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+
+ return 0;
+}
+
+void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
+ BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+}
+
+bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
+ const struct sk_buff *skb)
+{
+ return !skb->offload_fwd_mark ||
+ BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+}
diff --git a/net/core/dev.c b/net/core/dev.c
index 7feae74..1d5c6dd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3355,16 +3355,6 @@
else
skb_dst_force(skb);
-#ifdef CONFIG_NET_SWITCHDEV
- /* Don't forward if offload device already forwarded */
- if (skb->offload_fwd_mark &&
- skb->offload_fwd_mark == dev->offload_fwd_mark) {
- consume_skb(skb);
- rc = NET_XMIT_SUCCESS;
- goto out;
- }
-#endif
-
txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 2c683f2..1031a03 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -1305,88 +1305,3 @@
return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid);
}
EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id);
-
-static u32 switchdev_port_fwd_mark_get(struct net_device *dev,
- struct net_device *group_dev)
-{
- struct net_device *lower_dev;
- struct list_head *iter;
-
- netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
- if (lower_dev == dev)
- continue;
- if (switchdev_port_same_parent_id(dev, lower_dev))
- return lower_dev->offload_fwd_mark;
- return switchdev_port_fwd_mark_get(dev, lower_dev);
- }
-
- return dev->ifindex;
-}
-
-static void switchdev_port_fwd_mark_reset(struct net_device *group_dev,
- u32 old_mark, u32 *reset_mark)
-{
- struct net_device *lower_dev;
- struct list_head *iter;
-
- netdev_for_each_lower_dev(group_dev, lower_dev, iter) {
- if (lower_dev->offload_fwd_mark == old_mark) {
- if (!*reset_mark)
- *reset_mark = lower_dev->ifindex;
- lower_dev->offload_fwd_mark = *reset_mark;
- }
- switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark);
- }
-}
-
-/**
- * switchdev_port_fwd_mark_set - Set port offload forwarding mark
- *
- * @dev: port device
- * @group_dev: containing device
- * @joining: true if dev is joining group; false if leaving group
- *
- * An ungrouped port's offload mark is just its ifindex. A grouped
- * port's (member of a bridge, for example) offload mark is the ifindex
- * of one of the ports in the group with the same parent (switch) ID.
- * Ports on the same device in the same group will have the same mark.
- *
- * Example:
- *
- * br0 ifindex=9
- * sw1p1 ifindex=2 mark=2
- * sw1p2 ifindex=3 mark=2
- * sw2p1 ifindex=4 mark=5
- * sw2p2 ifindex=5 mark=5
- *
- * If sw2p2 leaves the bridge, we'll have:
- *
- * br0 ifindex=9
- * sw1p1 ifindex=2 mark=2
- * sw1p2 ifindex=3 mark=2
- * sw2p1 ifindex=4 mark=4
- * sw2p2 ifindex=5 mark=5
- */
-void switchdev_port_fwd_mark_set(struct net_device *dev,
- struct net_device *group_dev,
- bool joining)
-{
- u32 mark = dev->ifindex;
- u32 reset_mark = 0;
-
- if (group_dev) {
- ASSERT_RTNL();
- if (joining)
- mark = switchdev_port_fwd_mark_get(dev, group_dev);
- else if (dev->offload_fwd_mark == mark)
- /* Ohoh, this port was the mark reference port,
- * but it's leaving the group, so reset the
- * mark for the remaining ports in the group.
- */
- switchdev_port_fwd_mark_reset(group_dev, mark,
- &reset_mark);
- }
-
- dev->offload_fwd_mark = mark;
-}
-EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set);