dscpPolicy: more bpf verifier optimizations
This change avoids the use of multiple conditionals/ifs
by merging them all into one via bitwise arithmetic.
Bug: 361492282
Test: TreeHugger, atest CtsNetTestCases:android.net.cts.DscpPolicyTest
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Change-Id: I05c2f9cbddf14e0388cb8ac99c63ce0c76e46960
diff --git a/bpf/progs/bpf_net_helpers.h b/bpf/progs/bpf_net_helpers.h
index a86c3e6..a5664ba 100644
--- a/bpf/progs/bpf_net_helpers.h
+++ b/bpf/progs/bpf_net_helpers.h
@@ -139,6 +139,24 @@
if (skb->data_end - skb->data < len) bpf_skb_pull_data(skb, len);
}
+// anti-compiler-optimizer no-op: explicitly force full calculation of 'v'
+//
+// The use for this is to force full calculation of a complex arithmetic (likely binary
+// bitops) value, and then check the result only once (thus likely reducing the number
+// of required conditional jump instructions that badly affect bpf verifier runtime)
+//
+// The compiler cannot look into the assembly statement, so it doesn't know it does nothing.
+// Since the statement takes 'v' as both input and output in a register (+r),
+// the compiler must fully calculate the precise value of 'v' before this,
+// and must use the (possibly modified) value of 'v' afterwards (thus cannot
+// do funky optimizations to use partial results from before the asm).
+//
+// As this is not flagged 'volatile' this may still be moved out of a loop,
+// or even entirely optimized out if 'v' is never used afterwards.
+//
+// See: https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html
+#define COMPILER_FORCE_CALCULATION(v) asm ("" : "+r" (v))
+
struct egress_bool { bool egress; };
#define INGRESS ((struct egress_bool){ .egress = false })
#define EGRESS ((struct egress_bool){ .egress = true })
diff --git a/bpf/progs/dscpPolicy.c b/bpf/progs/dscpPolicy.c
index 4084933..7809e3a 100644
--- a/bpf/progs/dscpPolicy.c
+++ b/bpf/progs/dscpPolicy.c
@@ -115,13 +115,28 @@
if (!existing_rule) return; // impossible
- if (v6_equal(src_ip, existing_rule->src_ip) &&
- v6_equal(dst_ip, existing_rule->dst_ip) &&
- skb->ifindex == existing_rule->ifindex &&
- sport == existing_rule->src_port &&
- dport == existing_rule->dst_port &&
- protocol == existing_rule->proto) {
- if (existing_rule->dscp_val < 0) return;
+ uint64_t nomatch = 0;
+ nomatch |= v6_not_equal(src_ip, existing_rule->src_ip);
+ nomatch |= v6_not_equal(dst_ip, existing_rule->dst_ip);
+ nomatch |= (skb->ifindex ^ existing_rule->ifindex);
+ nomatch |= (sport ^ existing_rule->src_port);
+ nomatch |= (dport ^ existing_rule->dst_port);
+ nomatch |= (protocol ^ existing_rule->proto);
+ COMPILER_FORCE_CALCULATION(nomatch);
+
+ /*
+ * After the above funky bitwise arithmetic we have 'nomatch == 0' iff
+ * src_ip == existing_rule->src_ip &&
+ * dst_ip == existing_rule->dst_ip &&
+ * skb->ifindex == existing_rule->ifindex &&
+ * sport == existing_rule->src_port &&
+ * dport == existing_rule->dst_port &&
+ * protocol == existing_rule->proto
+ */
+
+ if (!nomatch) {
+ if (existing_rule->dscp_val < 0) return; // cached no-op
+
if (ipv4) {
uint8_t newTos = UPDATE_TOS(existing_rule->dscp_val, tos);
bpf_l3_csum_replace(skb, l2_header_size + IP4_OFFSET(check), htons(tos), htons(newTos),
@@ -133,7 +148,7 @@
bpf_skb_store_bytes(skb, l2_header_size, &new_first_be32, sizeof(__be32),
BPF_F_RECOMPUTE_CSUM);
}
- return;
+ return; // cached DSCP mutation
}
// Linear scan ipv4_dscp_policies_map since no stored params match skb.
diff --git a/bpf/progs/dscpPolicy.h b/bpf/progs/dscpPolicy.h
index 6a6b711..413fb0f 100644
--- a/bpf/progs/dscpPolicy.h
+++ b/bpf/progs/dscpPolicy.h
@@ -28,9 +28,6 @@
#define v6_not_equal(a, b) ((v6_hi_be64(a) ^ v6_hi_be64(b)) \
| (v6_lo_be64(a) ^ v6_lo_be64(b)))
-// Returns 'a == b' as boolean
-#define v6_equal(a, b) (!v6_not_equal((a), (b)))
-
typedef struct {
struct in6_addr src_ip;
struct in6_addr dst_ip;