Add packet tracing bpf implementation This adds the core BPF implementation of Android network packet tracing. The new code looks into the skb to pull out various bits of information. Both the program and the ring buffer are restricted to 5.8+ kernels and userdebug or eng builds. With the packet_info_config map defaulting to zero, userdebug and eng builds won't run any of the tracing today. The only effect will be 32k memory increase for the ringbuf and the check on the config array. Bug: 246985031 Test: build & flash both userdebug and user Change-Id: I144da2971c0738b565ad58abc17e456209f13bde

commit: 9419d25fe21fbd8a317bbb7523a8915ffcdbd1f7 [log] [tgz]
author: Ryan Zuklie <rzuklie@google.com> Fri Jan 20 17:03:56 2023 -0800
committer: Ryan Zuklie <rzuklie@google.com> Mon Jan 23 09:51:31 2023 -0800
tree: 7482f47fd198c55260caf54d1231c24648a02943
parent: 1db34f3b919f0863e0e80f29333ab43bbbf483a0 [diff] [blame]
diff --git a/bpf_progs/netd.c b/bpf_progs/netd.c
index 8ca6ec6..84da79d 100644
--- a/bpf_progs/netd.c
+++ b/bpf_progs/netd.c

@@ -46,6 +46,9 @@
 static const bool INGRESS = false;
 static const bool EGRESS = true;
 
+// Used for 'bool enable_tracing'
+static const bool TRACE_ON = true;
+static const bool TRACE_OFF = false;
 
 // offsetof(struct iphdr, ihl) -- but that's a bitfield
 #define IPPROTO_IHL_OFF 0
@@ -97,6 +100,19 @@
 /* never actually used from ebpf */
 DEFINE_BPF_MAP_NO_NETD(iface_index_name_map, HASH, uint32_t, IfaceValue, IFACE_INDEX_NAME_MAP_SIZE)
 
+// A single-element configuration array, packet tracing is enabled when 'true'.
+DEFINE_BPF_MAP_EXT(packet_trace_enabled_map, ARRAY, uint32_t, bool, 1,
+                   AID_ROOT, AID_SYSTEM, 0060, "fs_bpf_net_shared", "", false,
+                   BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, /*ignore_on_eng*/false,
+                   /*ignore_on_user*/true, /*ignore_on_userdebug*/false)
+
+// A ring buffer on which packet information is pushed. This map will only be loaded
+// on eng and userdebug devices. User devices won't load this to save memory.
+DEFINE_BPF_RINGBUF_EXT(packet_trace_ringbuf, PacketTrace, PACKET_TRACE_BUF_SIZE,
+                       AID_ROOT, AID_SYSTEM, 0060, "fs_bpf_net_shared", "", false,
+                       BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, /*ignore_on_eng*/false,
+                       /*ignore_on_user*/true, /*ignore_on_userdebug*/false);
+
 // iptables xt_bpf programs need to be usable by both netd and netutils_wrappers
 // selinux contexts, because even non-xt_bpf iptables mutations are implemented as
 // a full table dump, followed by an update in userspace, and then a reload into the kernel,
@@ -224,6 +240,66 @@
         : bpf_skb_load_bytes(skb, L3_off, to, len);
 }
 
+static __always_inline inline void do_packet_tracing(
+        const struct __sk_buff* const skb, const bool egress, const uint32_t uid,
+        const uint32_t tag, const bool enable_tracing, const unsigned kver) {
+    if (!enable_tracing) return;
+    if (kver < KVER(5, 8, 0)) return;
+
+    uint32_t mapKey = 0;
+    bool* traceConfig = bpf_packet_trace_enabled_map_lookup_elem(&mapKey);
+    if (traceConfig == NULL) return;
+    if (*traceConfig == false) return;
+
+    PacketTrace* pkt = bpf_packet_trace_ringbuf_reserve();
+    if (pkt == NULL) return;
+
+    // Errors from bpf_skb_load_bytes_net are ignored to favor returning something
+    // over returning nothing. In the event of an error, the kernel will fill in
+    // zero for the destination memory. Do not change the default '= 0' below.
+
+    uint8_t proto = 0;
+    uint8_t L4_off = 0;
+    uint8_t ipVersion = 0;
+    if (skb->protocol == htons(ETH_P_IP)) {
+        (void)bpf_skb_load_bytes_net(skb, IP4_OFFSET(protocol), &proto, sizeof(proto), kver);
+        (void)bpf_skb_load_bytes_net(skb, IPPROTO_IHL_OFF, &L4_off, sizeof(L4_off), kver);
+        L4_off = (L4_off & 0x0F) * 4;  // IHL calculation.
+        ipVersion = 4;
+    } else if (skb->protocol == htons(ETH_P_IPV6)) {
+        (void)bpf_skb_load_bytes_net(skb, IP6_OFFSET(nexthdr), &proto, sizeof(proto), kver);
+        L4_off = sizeof(struct ipv6hdr);
+        ipVersion = 6;
+    }
+
+    uint8_t flags = 0;
+    __be16 sport = 0, dport = 0;
+    if (proto == IPPROTO_TCP && L4_off >= 20) {
+        (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_FLAG32_OFF + 1, &flags, sizeof(flags), kver);
+        (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_OFFSET(source), &sport, sizeof(sport), kver);
+        (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_OFFSET(dest), &dport, sizeof(dport), kver);
+    } else if (proto == IPPROTO_UDP && L4_off >= 20) {
+        (void)bpf_skb_load_bytes_net(skb, L4_off + UDP_OFFSET(source), &sport, sizeof(sport), kver);
+        (void)bpf_skb_load_bytes_net(skb, L4_off + UDP_OFFSET(dest), &dport, sizeof(dport), kver);
+    }
+
+    pkt->timestampNs = bpf_ktime_get_boot_ns();
+    pkt->ifindex = skb->ifindex;
+    pkt->length = skb->len;
+
+    pkt->uid = uid;
+    pkt->tag = tag;
+    pkt->sport = sport;
+    pkt->dport = dport;
+
+    pkt->egress = egress;
+    pkt->ipProto = proto;
+    pkt->tcpFlags = flags;
+    pkt->ipVersion = ipVersion;
+
+    bpf_packet_trace_ringbuf_submit(pkt);
+}
+
 static __always_inline inline bool skip_owner_match(struct __sk_buff* skb, const unsigned kver) {
     uint32_t flag = 0;
     if (skb->protocol == htons(ETH_P_IP)) {
@@ -317,6 +393,7 @@
 }
 
 static __always_inline inline int bpf_traffic_account(struct __sk_buff* skb, bool egress,
+                                                      const bool enable_tracing,
                                                       const unsigned kver) {
     uint32_t sock_uid = bpf_get_socket_uid(skb);
     uint64_t cookie = bpf_get_socket_cookie(skb);
@@ -376,34 +453,51 @@
         key.tag = 0;
     }
 
+    do_packet_tracing(skb, egress, uid, tag, enable_tracing, kver);
     update_stats_with_config(skb, egress, &key, *selectedMap);
     update_app_uid_stats_map(skb, egress, &uid);
     asm("%0 &= 1" : "+r"(match));
     return match;
 }
 
+DEFINE_BPF_PROG_EXT("cgroupskb/ingress/stats$trace", AID_ROOT, AID_SYSTEM,
+                    bpf_cgroup_ingress_trace, KVER(5, 8, 0), KVER_INF,
+                    BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, false,
+                    "fs_bpf_netd_readonly", "", false, true, false)
+(struct __sk_buff* skb) {
+    return bpf_traffic_account(skb, INGRESS, TRACE_ON, KVER(5, 8, 0));
+}
+
 DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/ingress/stats$4_19", AID_ROOT, AID_SYSTEM,
                                 bpf_cgroup_ingress_4_19, KVER(4, 19, 0), KVER_INF)
 (struct __sk_buff* skb) {
-    return bpf_traffic_account(skb, INGRESS, KVER(4, 19, 0));
+    return bpf_traffic_account(skb, INGRESS, TRACE_OFF, KVER(4, 19, 0));
 }
 
 DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/ingress/stats$4_14", AID_ROOT, AID_SYSTEM,
                                 bpf_cgroup_ingress_4_14, KVER_NONE, KVER(4, 19, 0))
 (struct __sk_buff* skb) {
-    return bpf_traffic_account(skb, INGRESS, KVER_NONE);
+    return bpf_traffic_account(skb, INGRESS, TRACE_OFF, KVER_NONE);
+}
+
+DEFINE_BPF_PROG_EXT("cgroupskb/egress/stats$trace", AID_ROOT, AID_SYSTEM,
+                    bpf_cgroup_egress_trace, KVER(5, 8, 0), KVER_INF,
+                    BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, false,
+                    "fs_bpf_netd_readonly", "", false, true, false)
+(struct __sk_buff* skb) {
+    return bpf_traffic_account(skb, EGRESS, TRACE_ON, KVER(5, 8, 0));
 }
 
 DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/egress/stats$4_19", AID_ROOT, AID_SYSTEM,
                                 bpf_cgroup_egress_4_19, KVER(4, 19, 0), KVER_INF)
 (struct __sk_buff* skb) {
-    return bpf_traffic_account(skb, EGRESS, KVER(4, 19, 0));
+    return bpf_traffic_account(skb, EGRESS, TRACE_OFF, KVER(4, 19, 0));
 }
 
 DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/egress/stats$4_14", AID_ROOT, AID_SYSTEM,
                                 bpf_cgroup_egress_4_14, KVER_NONE, KVER(4, 19, 0))
 (struct __sk_buff* skb) {
-    return bpf_traffic_account(skb, EGRESS, KVER_NONE);
+    return bpf_traffic_account(skb, EGRESS, TRACE_OFF, KVER_NONE);
 }
 
 // WARNING: Android T's non-updatable netd depends on the name of this program.
commit	9419d25fe21fbd8a317bbb7523a8915ffcdbd1f7	[log] [tgz]
author	Ryan Zuklie <rzuklie@google.com>	Fri Jan 20 17:03:56 2023 -0800
committer	Ryan Zuklie <rzuklie@google.com>	Mon Jan 23 09:51:31 2023 -0800
tree	7482f47fd198c55260caf54d1231c24648a02943
parent	1db34f3b919f0863e0e80f29333ab43bbbf483a0 [diff] [blame]