Skip to content

Commit 0a6e777

Browse files
pjvuurendavem330
authored andcommitted
net/sched: allow flower to match tunnel options
Allow matching on options in Geneve tunnel headers. This makes use of existing tunnel metadata support. The options can be described in the form CLASS:TYPE:DATA/CLASS_MASK:TYPE_MASK:DATA_MASK, where CLASS is represented as a 16bit hexadecimal value, TYPE as an 8bit hexadecimal value and DATA as a variable length hexadecimal value. e.g. # ip link add name geneve0 type geneve dstport 0 external # tc qdisc add dev geneve0 ingress # tc filter add dev geneve0 protocol ip parent ffff: \ flower \ enc_src_ip 10.0.99.192 \ enc_dst_ip 10.0.99.193 \ enc_key_id 11 \ geneve_opts 0102:80:1122334421314151/ffff:ff:ffffffffffffffff \ ip_proto udp \ action mirred egress redirect dev eth1 This patch adds support for matching Geneve options in the order supplied by the user. This leads to an efficient implementation in the software datapath (and in our opinion hardware datapaths that offload this feature). It is also compatible with Geneve options matching provided by the Open vSwitch kernel datapath which is relevant here as the Flower classifier may be used as a mechanism to program flows into hardware as a form of Open vSwitch datapath offload (sometimes referred to as OVS-TC). The netlink Kernel/Userspace API may be extended, for example by adding a flag, if other matching options are desired, for example matching given options in any order. This would require an implementation in the TC software datapath. And be done in a way that drivers that facilitate offload of the Flower classifier can reject or accept such flows based on hardware datapath capabilities. This approach was discussed and agreed on at Netconf 2017 in Seoul. Signed-off-by: Simon Horman <[email protected]> Signed-off-by: Pieter Jansen van Vuuren <[email protected]> Acked-by: Jakub Kicinski <[email protected]> Signed-off-by: David S. Miller <[email protected]>
1 parent 92e2c40 commit 0a6e777

File tree

2 files changed

+269
-1
lines changed

2 files changed

+269
-1
lines changed

include/uapi/linux/pkt_cls.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,11 +480,37 @@ enum {
480480
TCA_FLOWER_KEY_ENC_IP_TTL, /* u8 */
481481
TCA_FLOWER_KEY_ENC_IP_TTL_MASK, /* u8 */
482482

483+
TCA_FLOWER_KEY_ENC_OPTS,
484+
TCA_FLOWER_KEY_ENC_OPTS_MASK,
485+
483486
__TCA_FLOWER_MAX,
484487
};
485488

486489
#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
487490

491+
enum {
492+
TCA_FLOWER_KEY_ENC_OPTS_UNSPEC,
493+
TCA_FLOWER_KEY_ENC_OPTS_GENEVE, /* Nested
494+
* TCA_FLOWER_KEY_ENC_OPT_GENEVE_
495+
* attributes
496+
*/
497+
__TCA_FLOWER_KEY_ENC_OPTS_MAX,
498+
};
499+
500+
#define TCA_FLOWER_KEY_ENC_OPTS_MAX (__TCA_FLOWER_KEY_ENC_OPTS_MAX - 1)
501+
502+
enum {
503+
TCA_FLOWER_KEY_ENC_OPT_GENEVE_UNSPEC,
504+
TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS, /* u16 */
505+
TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE, /* u8 */
506+
TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA, /* 4 to 128 bytes */
507+
508+
__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
509+
};
510+
511+
#define TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX \
512+
(__TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX - 1)
513+
488514
enum {
489515
TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
490516
TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),

net/sched/cls_flower.c

Lines changed: 243 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <net/pkt_cls.h>
2525
#include <net/ip.h>
2626
#include <net/flow_dissector.h>
27+
#include <net/geneve.h>
2728

2829
#include <net/dst.h>
2930
#include <net/dst_metadata.h>
@@ -53,6 +54,7 @@ struct fl_flow_key {
5354
struct flow_dissector_key_tcp tcp;
5455
struct flow_dissector_key_ip ip;
5556
struct flow_dissector_key_ip enc_ip;
57+
struct flow_dissector_key_enc_opts enc_opts;
5658
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
5759

5860
struct fl_flow_mask_range {
@@ -482,6 +484,21 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
482484
[TCA_FLOWER_KEY_ENC_IP_TOS_MASK] = { .type = NLA_U8 },
483485
[TCA_FLOWER_KEY_ENC_IP_TTL] = { .type = NLA_U8 },
484486
[TCA_FLOWER_KEY_ENC_IP_TTL_MASK] = { .type = NLA_U8 },
487+
[TCA_FLOWER_KEY_ENC_OPTS] = { .type = NLA_NESTED },
488+
[TCA_FLOWER_KEY_ENC_OPTS_MASK] = { .type = NLA_NESTED },
489+
};
490+
491+
static const struct nla_policy
492+
enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
493+
[TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
494+
};
495+
496+
static const struct nla_policy
497+
geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
498+
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] = { .type = NLA_U16 },
499+
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] = { .type = NLA_U8 },
500+
[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA] = { .type = NLA_BINARY,
501+
.len = 128 },
485502
};
486503

487504
static void fl_set_key_val(struct nlattr **tb,
@@ -603,6 +620,145 @@ static void fl_set_key_ip(struct nlattr **tb, bool encap,
603620
fl_set_key_val(tb, &key->ttl, ttl_key, &mask->ttl, ttl_mask, sizeof(key->ttl));
604621
}
605622

623+
static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
624+
int depth, int option_len,
625+
struct netlink_ext_ack *extack)
626+
{
627+
struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1];
628+
struct nlattr *class = NULL, *type = NULL, *data = NULL;
629+
struct geneve_opt *opt;
630+
int err, data_len = 0;
631+
632+
if (option_len > sizeof(struct geneve_opt))
633+
data_len = option_len - sizeof(struct geneve_opt);
634+
635+
opt = (struct geneve_opt *)&key->enc_opts.data[key->enc_opts.len];
636+
memset(opt, 0xff, option_len);
637+
opt->length = data_len / 4;
638+
opt->r1 = 0;
639+
opt->r2 = 0;
640+
opt->r3 = 0;
641+
642+
/* If no mask has been prodived we assume an exact match. */
643+
if (!depth)
644+
return sizeof(struct geneve_opt) + data_len;
645+
646+
if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_GENEVE) {
647+
NL_SET_ERR_MSG(extack, "Non-geneve option type for mask");
648+
return -EINVAL;
649+
}
650+
651+
err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX,
652+
nla, geneve_opt_policy, extack);
653+
if (err < 0)
654+
return err;
655+
656+
/* We are not allowed to omit any of CLASS, TYPE or DATA
657+
* fields from the key.
658+
*/
659+
if (!option_len &&
660+
(!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS] ||
661+
!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE] ||
662+
!tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA])) {
663+
NL_SET_ERR_MSG(extack, "Missing tunnel key geneve option class, type or data");
664+
return -EINVAL;
665+
}
666+
667+
/* Omitting any of CLASS, TYPE or DATA fields is allowed
668+
* for the mask.
669+
*/
670+
if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA]) {
671+
int new_len = key->enc_opts.len;
672+
673+
data = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA];
674+
data_len = nla_len(data);
675+
if (data_len < 4) {
676+
NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is less than 4 bytes long");
677+
return -ERANGE;
678+
}
679+
if (data_len % 4) {
680+
NL_SET_ERR_MSG(extack, "Tunnel key geneve option data is not a multiple of 4 bytes long");
681+
return -ERANGE;
682+
}
683+
684+
new_len += sizeof(struct geneve_opt) + data_len;
685+
BUILD_BUG_ON(FLOW_DIS_TUN_OPTS_MAX != IP_TUNNEL_OPTS_MAX);
686+
if (new_len > FLOW_DIS_TUN_OPTS_MAX) {
687+
NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
688+
return -ERANGE;
689+
}
690+
opt->length = data_len / 4;
691+
memcpy(opt->opt_data, nla_data(data), data_len);
692+
}
693+
694+
if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS]) {
695+
class = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS];
696+
opt->opt_class = nla_get_be16(class);
697+
}
698+
699+
if (tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE]) {
700+
type = tb[TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE];
701+
opt->type = nla_get_u8(type);
702+
}
703+
704+
return sizeof(struct geneve_opt) + data_len;
705+
}
706+
707+
static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
708+
struct fl_flow_key *mask,
709+
struct netlink_ext_ack *extack)
710+
{
711+
const struct nlattr *nla_enc_key, *nla_opt_key, *nla_opt_msk = NULL;
712+
int option_len, key_depth, msk_depth = 0;
713+
714+
nla_enc_key = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS]);
715+
716+
if (tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]) {
717+
nla_opt_msk = nla_data(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
718+
msk_depth = nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS_MASK]);
719+
}
720+
721+
nla_for_each_attr(nla_opt_key, nla_enc_key,
722+
nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
723+
switch (nla_type(nla_opt_key)) {
724+
case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
725+
option_len = 0;
726+
key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
727+
option_len = fl_set_geneve_opt(nla_opt_key, key,
728+
key_depth, option_len,
729+
extack);
730+
if (option_len < 0)
731+
return option_len;
732+
733+
key->enc_opts.len += option_len;
734+
/* At the same time we need to parse through the mask
735+
* in order to verify exact and mask attribute lengths.
736+
*/
737+
mask->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
738+
option_len = fl_set_geneve_opt(nla_opt_msk, mask,
739+
msk_depth, option_len,
740+
extack);
741+
if (option_len < 0)
742+
return option_len;
743+
744+
mask->enc_opts.len += option_len;
745+
if (key->enc_opts.len != mask->enc_opts.len) {
746+
NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
747+
return -EINVAL;
748+
}
749+
750+
if (msk_depth)
751+
nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
752+
break;
753+
default:
754+
NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
755+
return -EINVAL;
756+
}
757+
}
758+
759+
return 0;
760+
}
761+
606762
static int fl_set_key(struct net *net, struct nlattr **tb,
607763
struct fl_flow_key *key, struct fl_flow_key *mask,
608764
struct netlink_ext_ack *extack)
@@ -799,6 +955,12 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
799955

800956
fl_set_key_ip(tb, true, &key->enc_ip, &mask->enc_ip);
801957

958+
if (tb[TCA_FLOWER_KEY_ENC_OPTS]) {
959+
ret = fl_set_enc_opt(tb, key, mask, extack);
960+
if (ret)
961+
return ret;
962+
}
963+
802964
if (tb[TCA_FLOWER_KEY_FLAGS])
803965
ret = fl_set_key_flags(tb, &key->control.flags, &mask->control.flags);
804966

@@ -894,6 +1056,8 @@ static void fl_init_dissector(struct flow_dissector *dissector,
8941056
FLOW_DISSECTOR_KEY_ENC_PORTS, enc_tp);
8951057
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
8961058
FLOW_DISSECTOR_KEY_ENC_IP, enc_ip);
1059+
FL_KEY_SET_IF_MASKED(mask, keys, cnt,
1060+
FLOW_DISSECTOR_KEY_ENC_OPTS, enc_opts);
8971061

8981062
skb_flow_dissector_init(dissector, keys, cnt);
8991063
}
@@ -1414,6 +1578,83 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask)
14141578
return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask);
14151579
}
14161580

1581+
static int fl_dump_key_geneve_opt(struct sk_buff *skb,
1582+
struct flow_dissector_key_enc_opts *enc_opts)
1583+
{
1584+
struct geneve_opt *opt;
1585+
struct nlattr *nest;
1586+
int opt_off = 0;
1587+
1588+
nest = nla_nest_start(skb, TCA_FLOWER_KEY_ENC_OPTS_GENEVE);
1589+
if (!nest)
1590+
goto nla_put_failure;
1591+
1592+
while (enc_opts->len > opt_off) {
1593+
opt = (struct geneve_opt *)&enc_opts->data[opt_off];
1594+
1595+
if (nla_put_be16(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_CLASS,
1596+
opt->opt_class))
1597+
goto nla_put_failure;
1598+
if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_TYPE,
1599+
opt->type))
1600+
goto nla_put_failure;
1601+
if (nla_put(skb, TCA_FLOWER_KEY_ENC_OPT_GENEVE_DATA,
1602+
opt->length * 4, opt->opt_data))
1603+
goto nla_put_failure;
1604+
1605+
opt_off += sizeof(struct geneve_opt) + opt->length * 4;
1606+
}
1607+
nla_nest_end(skb, nest);
1608+
return 0;
1609+
1610+
nla_put_failure:
1611+
nla_nest_cancel(skb, nest);
1612+
return -EMSGSIZE;
1613+
}
1614+
1615+
static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
1616+
struct flow_dissector_key_enc_opts *enc_opts)
1617+
{
1618+
struct nlattr *nest;
1619+
int err;
1620+
1621+
if (!enc_opts->len)
1622+
return 0;
1623+
1624+
nest = nla_nest_start(skb, enc_opt_type);
1625+
if (!nest)
1626+
goto nla_put_failure;
1627+
1628+
switch (enc_opts->dst_opt_type) {
1629+
case TUNNEL_GENEVE_OPT:
1630+
err = fl_dump_key_geneve_opt(skb, enc_opts);
1631+
if (err)
1632+
goto nla_put_failure;
1633+
break;
1634+
default:
1635+
goto nla_put_failure;
1636+
}
1637+
nla_nest_end(skb, nest);
1638+
return 0;
1639+
1640+
nla_put_failure:
1641+
nla_nest_cancel(skb, nest);
1642+
return -EMSGSIZE;
1643+
}
1644+
1645+
static int fl_dump_key_enc_opt(struct sk_buff *skb,
1646+
struct flow_dissector_key_enc_opts *key_opts,
1647+
struct flow_dissector_key_enc_opts *msk_opts)
1648+
{
1649+
int err;
1650+
1651+
err = fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS, key_opts);
1652+
if (err)
1653+
return err;
1654+
1655+
return fl_dump_key_options(skb, TCA_FLOWER_KEY_ENC_OPTS_MASK, msk_opts);
1656+
}
1657+
14171658
static int fl_dump_key(struct sk_buff *skb, struct net *net,
14181659
struct fl_flow_key *key, struct fl_flow_key *mask)
14191660
{
@@ -1594,7 +1835,8 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net,
15941835
&mask->enc_tp.dst,
15951836
TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,
15961837
sizeof(key->enc_tp.dst)) ||
1597-
fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip))
1838+
fl_dump_key_ip(skb, true, &key->enc_ip, &mask->enc_ip) ||
1839+
fl_dump_key_enc_opt(skb, &key->enc_opts, &mask->enc_opts))
15981840
goto nla_put_failure;
15991841

16001842
if (fl_dump_key_flags(skb, key->control.flags, mask->control.flags))

0 commit comments

Comments
 (0)