mlxsw: spectrum_router: Onload conflicting tunnels
authorPetr Machata <petrm@mellanox.com>
Fri, 3 Nov 2017 09:03:40 +0000 (10:03 +0100)
committerDavid S. Miller <davem@davemloft.net>
Sat, 4 Nov 2017 00:15:18 +0000 (09:15 +0900)
The approach for offloading IP tunnels implemented currently by mlxsw
doesn't allow two tunnels that have the same local IP address in the
same (underlay) VRF. Previously, offloads were introduced on demand as
encap routes were formed. When such a route was created that would cause
offload of a conflicting tunnel, mlxsw_sp_ipip_entry_create() would
detect it and return -EEXIST, which would propagate up and cause FIB
abort.

Now however IPIP entries are created as soon as an offloadable netdevice
is created, and the failure prevents creation of such device.
Furthermore, if the driver is installed at the point where such
conflicting tunnels exist, the failure actually prevents successful
modprobe.

Furthermore, follow-up patches implement handling of NETDEV_CHANGE due
to the local address change. However, NETDEV_CHANGE can't be vetoed. The
failure merely means that the offloads weren't updated, but the change
in Linux configuration is not rolled back. It is thus desirable to have
a robust way of handling these conflicts, which can later be reused for
handling NETDEV_CHANGE as well.

To fix this, when a conflicting tunnel is created, instead of failing,
simply pull the old tunnel to slow path and reject offloading the
new one.

Introduce two functions: mlxsw_sp_ipip_entry_demote_tunnel() and
mlxsw_sp_ipip_demote_tunnel_by_saddr() to handle this. Make them both
public, because they will be useful later on in this patchset.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h

index 897a3841e52f931e0d79fafede5a40bb15af449f..832bfa12551262fd482ceb1d480f4d810bb331d0 100644 (file)
@@ -1159,24 +1159,7 @@ mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
                           enum mlxsw_sp_ipip_type ipipt,
                           struct net_device *ol_dev)
 {
-       u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
-       struct mlxsw_sp_router *router = mlxsw_sp->router;
        struct mlxsw_sp_ipip_entry *ipip_entry;
-       enum mlxsw_sp_l3proto ul_proto;
-       union mlxsw_sp_l3addr saddr;
-
-       /* The configuration where several tunnels have the same local address
-        * in the same underlay table needs special treatment in the HW. That is
-        * currently not implemented in the driver.
-        */
-       ul_proto = router->ipip_ops_arr[ipipt]->ul_proto;
-       saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
-       list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
-                           ipip_list_node) {
-               if (mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
-                                                     ul_tb_id, ipip_entry))
-                       return ERR_PTR(-EEXIST);
-       }
 
        ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
        if (IS_ERR(ipip_entry))
@@ -1292,14 +1275,24 @@ static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
                                                struct net_device *ol_dev)
 {
        struct mlxsw_sp_ipip_entry *ipip_entry;
+       enum mlxsw_sp_l3proto ul_proto;
        enum mlxsw_sp_ipip_type ipipt;
+       union mlxsw_sp_l3addr saddr;
+       u32 ul_tb_id;
 
        mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
        if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
-               ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
-                                                       ol_dev);
-               if (IS_ERR(ipip_entry))
-                       return PTR_ERR(ipip_entry);
+               ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
+               ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
+               saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
+               if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
+                                                         saddr, ul_tb_id,
+                                                         NULL)) {
+                       ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
+                                                               ol_dev);
+                       if (IS_ERR(ipip_entry))
+                               return PTR_ERR(ipip_entry);
+               }
        }
 
        return 0;
@@ -1441,6 +1434,44 @@ static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
                                                   true, false, false, extack);
 }
 
+void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_ipip_entry *ipip_entry)
+{
+       struct net_device *ol_dev = ipip_entry->ol_dev;
+
+       if (ol_dev->flags & IFF_UP)
+               mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
+       mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
+}
+
+/* The configuration where several tunnels have the same local address in the
+ * same underlay table needs special treatment in the HW. That is currently not
+ * implemented in the driver. This function finds and demotes the first tunnel
+ * with a given source address, except the one passed in in the argument
+ * `except'.
+ */
+bool
+mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
+                                    enum mlxsw_sp_l3proto ul_proto,
+                                    union mlxsw_sp_l3addr saddr,
+                                    u32 ul_tb_id,
+                                    const struct mlxsw_sp_ipip_entry *except)
+{
+       struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
+
+       list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
+                                ipip_list_node) {
+               if (ipip_entry != except &&
+                   mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
+                                                     ul_tb_id, ipip_entry)) {
+                       mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
+                       return true;
+               }
+       }
+
+       return false;
+}
+
 int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
                                     struct net_device *ol_dev,
                                     unsigned long event,
index 4b8a12a4f49396c3276622dd5d5f95f21974e62b..5dd650bdcff646e5a01de5782bcfc2cc9c5e4dd7 100644 (file)
@@ -110,6 +110,14 @@ int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
                                        bool keep_encap,
                                        bool update_nexthops,
                                        struct netlink_ext_ack *extack);
+void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
+                                      struct mlxsw_sp_ipip_entry *ipip_entry);
+bool
+mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
+                                    enum mlxsw_sp_l3proto ul_proto,
+                                    union mlxsw_sp_l3addr saddr,
+                                    u32 ul_tb_id,
+                                    const struct mlxsw_sp_ipip_entry *except);
 struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
                                               struct mlxsw_sp_nexthop *nh);
 bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh);