From b57c1da5c39a9520a13d671c56317c3409debb92 Mon Sep 17 00:00:00 2001 From: Paolo Valerio Date: Mon, 30 Sep 2024 22:50:33 +0200 Subject: [PATCH] conntrack: Use a per zone default limit. Before this change the default limit, instead of being considered per-zone, was considered as a global value that every new entry was checked against during the creation. This was not the intended behavior as the default limit should be inherited by each zone instead of being an aggregate number. This change corrects that by removing the default limit from the cmap and making it global (atomic). Now, whenever a new connection needs to be committed, if default_zone_limit is set and the entry for the zone doesn't exist, a new entry for that zone is lazily created, marked as default. All subsequent packets for that zone will undergo the regular lookup process. To distinguish between default and user-defined entries, the storage for the limit member of struct conntrack_zone_limit has been changed from a 32-bit unsigned integer to a 64-bit signed integer. The negative value ZONE_LIMIT_CONN_DEFAULT now indicates a default entry. Operations such as creation/deletion are modified accordingly taking into account this new behavior. Worth noting that OVS_REQUIRES(ct->ct_lock) is not a strict requirement for zone_limit_lookup_or_default(), however since the function operates under the lock and it can create an entry in the slow path, the lock requirement is enforced in order to make thread safety checks work. The function can still be moved outside the creation lock or any lock, keeping the fastpath lockless (turning zone_limit_lookup_protected() to its unprotected version) and locking only in the slow path (replacing zone_limit_create__() with zone_limit_create__(). The patch also extends `conntrack - limit by zone` test in order to check the behavior, and while at it, update test's packet-out to use compose-packet function. Fixes: a7f33fdbfb67 ("conntrack: Support zone limits.") Reported-at: https://issues.redhat.com/browse/FDP-122 Reported-by: Ilya Maximets Signed-off-by: Paolo Valerio Signed-off-by: Aaron Conole --- NEWS | 5 + lib/conntrack-private.h | 7 +- lib/conntrack.c | 233 +++++++++++++++++++++++++++++++--------- tests/system-traffic.at | 64 +++++++---- 4 files changed, 236 insertions(+), 73 deletions(-) diff --git a/NEWS b/NEWS index 7a9626bf4ee..48384ab1d1c 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,10 @@ Post-v3.4.0 -------------------- + - Userspace datapath: + * The default zone limit, if set, is now inherited by any zone + that does not have a specific value defined, rather than being + treated as a global value, aligning the behavior with that of + the kernel datapath. v3.4.0 - 15 Aug 2024 diff --git a/lib/conntrack-private.h b/lib/conntrack-private.h index 2770470d12c..46b212754be 100644 --- a/lib/conntrack-private.h +++ b/lib/conntrack-private.h @@ -198,10 +198,11 @@ enum ct_ephemeral_range { #define FOR_EACH_PORT_IN_RANGE(curr, min, max) \ FOR_EACH_PORT_IN_RANGE__(curr, min, max, OVS_JOIN(idx, __COUNTER__)) +#define ZONE_LIMIT_CONN_DEFAULT -1 struct conntrack_zone_limit { int32_t zone; - atomic_uint32_t limit; + atomic_int64_t limit; atomic_count count; uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ }; @@ -212,6 +213,9 @@ struct conntrack { struct rculist exp_lists[N_EXP_LISTS] OVS_GUARDED; struct cmap zone_limits OVS_GUARDED; struct cmap timeout_policies OVS_GUARDED; + uint32_t zone_limit_seq OVS_GUARDED; /* Used to disambiguate zone limit + * counts. */ + atomic_uint32_t default_zone_limit; uint32_t hash_basis; /* Salt for hashing a connection key. */ pthread_t clean_thread; /* Periodically cleans up connection tracker. */ @@ -234,7 +238,6 @@ struct conntrack { * control context. */ struct ipf *ipf; /* Fragmentation handling context. */ - uint32_t zone_limit_seq; /* Used to disambiguate zone limit counts. */ atomic_bool tcp_seq_chk; /* Check TCP sequence numbers. */ atomic_uint32_t sweep_ms; /* Next sweep interval. */ }; diff --git a/lib/conntrack.c b/lib/conntrack.c index 3d19d37dfa7..0061a563647 100644 --- a/lib/conntrack.c +++ b/lib/conntrack.c @@ -270,6 +270,7 @@ conntrack_init(void) atomic_init(&ct->n_conn_limit, DEFAULT_N_CONN_LIMIT); atomic_init(&ct->tcp_seq_chk, true); atomic_init(&ct->sweep_ms, 20000); + atomic_init(&ct->default_zone_limit, 0); latch_init(&ct->clean_thread_exit); ct->clean_thread = ovs_thread_create("ct_clean", clean_thread_main, ct); ct->ipf = ipf_init(); @@ -296,6 +297,28 @@ zone_key_hash(int32_t zone, uint32_t basis) return hash; } +static int64_t +zone_limit_get_limit__(struct conntrack_zone_limit *czl) +{ + int64_t limit; + atomic_read_relaxed(&czl->limit, &limit); + + return limit; +} + +static int64_t +zone_limit_get_limit(struct conntrack *ct, struct conntrack_zone_limit *czl) +{ + int64_t limit = zone_limit_get_limit__(czl); + + if (limit == ZONE_LIMIT_CONN_DEFAULT) { + atomic_read_relaxed(&ct->default_zone_limit, &limit); + limit = limit ? : -1; + } + + return limit; +} + static struct zone_limit * zone_limit_lookup_protected(struct conntrack *ct, int32_t zone) OVS_REQUIRES(ct->ct_lock) @@ -323,11 +346,56 @@ zone_limit_lookup(struct conntrack *ct, int32_t zone) return NULL; } +static struct zone_limit * +zone_limit_create__(struct conntrack *ct, int32_t zone, int64_t limit) + OVS_REQUIRES(ct->ct_lock) +{ + struct zone_limit *zl = NULL; + + if (zone > DEFAULT_ZONE && zone <= MAX_ZONE) { + zl = xmalloc(sizeof *zl); + atomic_init(&zl->czl.limit, limit); + atomic_count_init(&zl->czl.count, 0); + zl->czl.zone = zone; + zl->czl.zone_limit_seq = ct->zone_limit_seq++; + uint32_t hash = zone_key_hash(zone, ct->hash_basis); + cmap_insert(&ct->zone_limits, &zl->node, hash); + } + + return zl; +} + +static struct zone_limit * +zone_limit_create(struct conntrack *ct, int32_t zone, int64_t limit) + OVS_REQUIRES(ct->ct_lock) +{ + struct zone_limit *zl = zone_limit_lookup_protected(ct, zone); + + if (zl) { + return zl; + } + + return zone_limit_create__(ct, zone, limit); +} + +/* Lazily creates a new entry in the zone_limits cmap if default limit + * is set and there's no entry for the zone. */ static struct zone_limit * zone_limit_lookup_or_default(struct conntrack *ct, int32_t zone) + OVS_REQUIRES(ct->ct_lock) { - struct zone_limit *zl = zone_limit_lookup(ct, zone); - return zl ? zl : zone_limit_lookup(ct, DEFAULT_ZONE); + struct zone_limit *zl = zone_limit_lookup_protected(ct, zone); + + if (!zl) { + uint32_t limit; + atomic_read_relaxed(&ct->default_zone_limit, &limit); + + if (limit) { + zl = zone_limit_create__(ct, zone, ZONE_LIMIT_CONN_DEFAULT); + } + } + + return zl; } struct conntrack_zone_info @@ -338,89 +406,147 @@ zone_limit_get(struct conntrack *ct, int32_t zone) .limit = 0, .count = 0, }; - struct zone_limit *zl = zone_limit_lookup_or_default(ct, zone); + struct zone_limit *zl = zone_limit_lookup(ct, zone); if (zl) { - czl.zone = zl->czl.zone; - atomic_read_relaxed(&zl->czl.limit, &czl.limit); + int64_t czl_limit = zone_limit_get_limit__(&zl->czl); + if (czl_limit > ZONE_LIMIT_CONN_DEFAULT) { + czl.zone = zl->czl.zone; + czl.limit = czl_limit; + } else { + atomic_read_relaxed(&ct->default_zone_limit, &czl.limit); + } + czl.count = atomic_count_get(&zl->czl.count); + } else { + atomic_read_relaxed(&ct->default_zone_limit, &czl.limit); } + return czl; } -static int -zone_limit_create(struct conntrack *ct, int32_t zone, uint32_t limit) +static void +zone_limit_clean__(struct conntrack *ct, struct zone_limit *zl) OVS_REQUIRES(ct->ct_lock) { - struct zone_limit *zl = zone_limit_lookup_protected(ct, zone); + uint32_t hash = zone_key_hash(zl->czl.zone, ct->hash_basis); + cmap_remove(&ct->zone_limits, &zl->node, hash); + ovsrcu_postpone(free, zl); +} - if (zl) { - return 0; - } +static void +zone_limit_clean(struct conntrack *ct, struct zone_limit *zl) + OVS_REQUIRES(ct->ct_lock) +{ + uint32_t limit; - if (zone >= DEFAULT_ZONE && zone <= MAX_ZONE) { - zl = xmalloc(sizeof *zl); - atomic_init(&zl->czl.limit, limit); - atomic_count_init(&zl->czl.count, 0); - zl->czl.zone = zone; - zl->czl.zone_limit_seq = ct->zone_limit_seq++; - uint32_t hash = zone_key_hash(zone, ct->hash_basis); - cmap_insert(&ct->zone_limits, &zl->node, hash); - return 0; + atomic_read_relaxed(&ct->default_zone_limit, &limit); + /* Do not remove the entry if the default limit is enabled, but + * simply move the limit to default. */ + if (limit) { + atomic_store_relaxed(&zl->czl.limit, ZONE_LIMIT_CONN_DEFAULT); } else { - return EINVAL; + zone_limit_clean__(ct, zl); } } -int -zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit) +static void +zone_limit_clean_default(struct conntrack *ct) + OVS_REQUIRES(ct->ct_lock) { - int err = 0; - struct zone_limit *zl = zone_limit_lookup(ct, zone); - if (zl) { - atomic_store_relaxed(&zl->czl.limit, limit); - VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone); - } else { - ovs_mutex_lock(&ct->ct_lock); - err = zone_limit_create(ct, zone, limit); - ovs_mutex_unlock(&ct->ct_lock); - if (!err) { - VLOG_INFO("Created zone limit of %u for zone %d", limit, zone); - } else { - VLOG_WARN("Request to create zone limit for invalid zone %d", - zone); + struct zone_limit *zl; + int64_t czl_limit; + + atomic_store_relaxed(&ct->default_zone_limit, 0); + + CMAP_FOR_EACH (zl, node, &ct->zone_limits) { + atomic_read_relaxed(&zl->czl.limit, &czl_limit); + if (zone_limit_get_limit__(&zl->czl) == ZONE_LIMIT_CONN_DEFAULT) { + zone_limit_clean__(ct, zl); } } - return err; } -static void -zone_limit_clean(struct conntrack *ct, struct zone_limit *zl) +static bool +zone_limit_delete__(struct conntrack *ct, int32_t zone) OVS_REQUIRES(ct->ct_lock) { - uint32_t hash = zone_key_hash(zl->czl.zone, ct->hash_basis); - cmap_remove(&ct->zone_limits, &zl->node, hash); - ovsrcu_postpone(free, zl); + struct zone_limit *zl = NULL; + + if (zone == DEFAULT_ZONE) { + zone_limit_clean_default(ct); + } else { + zl = zone_limit_lookup_protected(ct, zone); + if (zl) { + zone_limit_clean(ct, zl); + } + } + + return zl != NULL; } int zone_limit_delete(struct conntrack *ct, int32_t zone) { + bool deleted; + ovs_mutex_lock(&ct->ct_lock); - struct zone_limit *zl = zone_limit_lookup_protected(ct, zone); - if (zl) { - zone_limit_clean(ct, zl); - } + deleted = zone_limit_delete__(ct, zone); + ovs_mutex_unlock(&ct->ct_lock); if (zone != DEFAULT_ZONE) { - VLOG_INFO(zl ? "Deleted zone limit for zone %d" - : "Attempted delete of non-existent zone limit: zone %d", + VLOG_INFO(deleted + ? "Deleted zone limit for zone %d" + : "Attempted delete of non-existent zone limit: zone %d", zone); } - ovs_mutex_unlock(&ct->ct_lock); return 0; } +static void +zone_limit_update_default(struct conntrack *ct, int32_t zone, uint32_t limit) +{ + /* limit zero means delete default. */ + if (limit == 0) { + ovs_mutex_lock(&ct->ct_lock); + zone_limit_delete__(ct, zone); + ovs_mutex_unlock(&ct->ct_lock); + } else { + atomic_store_relaxed(&ct->default_zone_limit, limit); + } +} + +int +zone_limit_update(struct conntrack *ct, int32_t zone, uint32_t limit) +{ + struct zone_limit *zl; + int err = 0; + + if (zone == DEFAULT_ZONE) { + zone_limit_update_default(ct, zone, limit); + VLOG_INFO("Set default zone limit to %u", limit); + return err; + } + + zl = zone_limit_lookup(ct, zone); + if (zl) { + atomic_store_relaxed(&zl->czl.limit, limit); + VLOG_INFO("Changed zone limit of %u for zone %d", limit, zone); + } else { + ovs_mutex_lock(&ct->ct_lock); + err = zone_limit_create(ct, zone, limit) == NULL; + ovs_mutex_unlock(&ct->ct_lock); + if (!err) { + VLOG_INFO("Created zone limit of %u for zone %d", limit, zone); + } else { + VLOG_WARN("Request to create zone limit for invalid zone %d", + zone); + } + } + + return err; +} + static void conn_clean__(struct conntrack *ct, struct conn *conn) OVS_REQUIRES(ct->ct_lock) @@ -917,13 +1043,14 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt, } if (commit) { - uint32_t czl_limit; + int64_t czl_limit; struct conn_key_node *fwd_key_node, *rev_key_node; struct zone_limit *zl = zone_limit_lookup_or_default(ct, ctx->key.zone); if (zl) { - atomic_read_relaxed(&zl->czl.limit, &czl_limit); - if (atomic_count_get(&zl->czl.count) >= czl_limit) { + czl_limit = zone_limit_get_limit(ct, &zl->czl); + if (czl_limit >= 0 && + atomic_count_get(&zl->czl.count) >= czl_limit) { COVERAGE_INC(conntrack_zone_full); return nc; } diff --git a/tests/system-traffic.at b/tests/system-traffic.at index eae37e3a5dd..98754f70739 100644 --- a/tests/system-traffic.at +++ b/tests/system-traffic.at @@ -5692,6 +5692,41 @@ priority=100,in_port=2,udp,action=ct(zone=3,commit),1 AT_CHECK([ovs-ofctl --bundle add-flows br0 flows.txt]) +m4_define([UDP_PKT], [m4_join([,], + [eth_src=50:54:00:00:00:0$1,eth_dst=50:54:00:00:00:0$2,dl_type=0x0800], + [nw_src=10.1.1.$1,nw_dst=10.1.1.$2], + [nw_proto=17,nw_ttl=64,nw_frag=no], + [udp_src=1,udp_dst=$3])]) + +AT_CHECK([ovs-appctl dpctl/ct-set-limits default=3]) +AT_CHECK([ovs-appctl dpctl/ct-get-limits], [],[dnl +default limit=3 +]) + +dnl Send 5 packets from each port in order to hit the default zone limit. +for i in $(seq 2 6); do + pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([1], [2], [$i])") + AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=${pkt} actions=resubmit(,0)"]) +done + +AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1], [],[dnl +default limit=3 +zone=1,limit=3,count=3 +]) + +for i in $(seq 2 6); do + pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([3], [4], [$i])") + AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=${pkt} actions=resubmit(,0)"]) +done + +AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1,3], [],[dnl +default limit=3 +zone=1,limit=3,count=3 +zone=3,limit=3,count=3 +]) + +AT_CHECK([ovs-appctl dpctl/flush-conntrack]) + AT_CHECK([ovs-appctl dpctl/ct-set-limits default=10 zone=1,limit=5 zone=2,limit=3 zone=3,limit=3 zone=4,limit=15]) AT_CHECK([ovs-appctl dpctl/ct-del-limits zone=2,4,5]) AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1,2,3,4], [],[dnl @@ -5703,15 +5738,10 @@ zone=4,limit=10,count=0 ]) dnl Test UDP from port 1 -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000200080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000300080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000400080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000500080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000600080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000700080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000800080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000900080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=50540000000a50540000000908004500001c000000000011a4cd0a0101010a0101020001000a00080000 actions=resubmit(,0)"]) +for i in $(seq 2 10); do + pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([1], [2], [$i])") + AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=1 packet=${pkt} actions=resubmit(,0)"]) +done AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1,2,3,4,5], [0], [dnl default limit=10 @@ -5738,11 +5768,10 @@ udp,orig=(src=10.1.1.1,dst=10.1.1.2,sport=1,dport=6),reply=(src=10.1.1.2,dst=10. ]) dnl Test UDP from port 2 -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000200080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000300080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000400080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000500080000 actions=resubmit(,0)"]) -AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000600080000 actions=resubmit(,0)"]) +for i in $(seq 2 6); do + pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([3], [4], [$i])") + AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=${pkt} actions=resubmit(,0)"]) +done AT_CHECK([ovs-appctl dpctl/ct-get-limits zone=1,3], [0], [dnl default limit=10 @@ -5803,10 +5832,9 @@ default limit=10 zone=1,limit=3,count=0 zone=3,limit=3,count=0]) -for i in 2 3 4 5 6; do - packet="50540000000a50540000000908004500001c000000000011a4c90a0101030a0101040001000${i}00080000" - AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 \ - "in_port=2 packet=${packet} actions=resubmit(,0)"]) +for i in $(seq 2 6); do + pkt=$(ovs-ofctl compose-packet --bare "UDP_PKT([3], [4], [$i])") + AT_CHECK([ovs-ofctl -O OpenFlow13 packet-out br0 "in_port=2 packet=${pkt} actions=resubmit(,0)"]) done AT_CHECK([ovs-appctl dpctl/dump-conntrack | grep "orig=.src=10\.1\.1\.3," | sort ], [0], [dnl