Merge "Cleanup OWNERS"
diff --git a/bpf_progs/bpf_net_helpers.h b/bpf_progs/bpf_net_helpers.h
index c39269e..b7ca3af 100644
--- a/bpf_progs/bpf_net_helpers.h
+++ b/bpf_progs/bpf_net_helpers.h
@@ -21,6 +21,18 @@
 #include <stdbool.h>
 #include <stdint.h>
 
+// bionic kernel uapi linux/udp.h header is munged...
+#define __kernel_udphdr udphdr
+#include <linux/udp.h>
+
+// Offsets from beginning of L4 (TCP/UDP) header
+#define TCP_OFFSET(field) offsetof(struct tcphdr, field)
+#define UDP_OFFSET(field) offsetof(struct udphdr, field)
+
+// Offsets from beginning of L3 (IPv4/IPv6) header
+#define IP4_OFFSET(field) offsetof(struct iphdr, field)
+#define IP6_OFFSET(field) offsetof(struct ipv6hdr, field)
+
 // this returns 0 iff skb->sk is NULL
 static uint64_t (*bpf_get_socket_cookie)(struct __sk_buff* skb) = (void*)BPF_FUNC_get_socket_cookie;
 
diff --git a/bpf_progs/netd.c b/bpf_progs/netd.c
index 1272f96..84da79d 100644
--- a/bpf_progs/netd.c
+++ b/bpf_progs/netd.c
@@ -46,8 +46,9 @@
 static const bool INGRESS = false;
 static const bool EGRESS = true;
 
-#define IP_PROTO_OFF offsetof(struct iphdr, protocol)
-#define IPV6_PROTO_OFF offsetof(struct ipv6hdr, nexthdr)
+// Used for 'bool enable_tracing'
+static const bool TRACE_ON = true;
+static const bool TRACE_OFF = false;
 
 // offsetof(struct iphdr, ihl) -- but that's a bitfield
 #define IPPROTO_IHL_OFF 0
@@ -99,6 +100,19 @@
 /* never actually used from ebpf */
 DEFINE_BPF_MAP_NO_NETD(iface_index_name_map, HASH, uint32_t, IfaceValue, IFACE_INDEX_NAME_MAP_SIZE)
 
+// A single-element configuration array, packet tracing is enabled when 'true'.
+DEFINE_BPF_MAP_EXT(packet_trace_enabled_map, ARRAY, uint32_t, bool, 1,
+                   AID_ROOT, AID_SYSTEM, 0060, "fs_bpf_net_shared", "", false,
+                   BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, /*ignore_on_eng*/false,
+                   /*ignore_on_user*/true, /*ignore_on_userdebug*/false)
+
+// A ring buffer on which packet information is pushed. This map will only be loaded
+// on eng and userdebug devices. User devices won't load this to save memory.
+DEFINE_BPF_RINGBUF_EXT(packet_trace_ringbuf, PacketTrace, PACKET_TRACE_BUF_SIZE,
+                       AID_ROOT, AID_SYSTEM, 0060, "fs_bpf_net_shared", "", false,
+                       BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, /*ignore_on_eng*/false,
+                       /*ignore_on_user*/true, /*ignore_on_userdebug*/false);
+
 // iptables xt_bpf programs need to be usable by both netd and netutils_wrappers
 // selinux contexts, because even non-xt_bpf iptables mutations are implemented as
 // a full table dump, followed by an update in userspace, and then a reload into the kernel,
@@ -226,12 +240,72 @@
         : bpf_skb_load_bytes(skb, L3_off, to, len);
 }
 
+static __always_inline inline void do_packet_tracing(
+        const struct __sk_buff* const skb, const bool egress, const uint32_t uid,
+        const uint32_t tag, const bool enable_tracing, const unsigned kver) {
+    if (!enable_tracing) return;
+    if (kver < KVER(5, 8, 0)) return;
+
+    uint32_t mapKey = 0;
+    bool* traceConfig = bpf_packet_trace_enabled_map_lookup_elem(&mapKey);
+    if (traceConfig == NULL) return;
+    if (*traceConfig == false) return;
+
+    PacketTrace* pkt = bpf_packet_trace_ringbuf_reserve();
+    if (pkt == NULL) return;
+
+    // Errors from bpf_skb_load_bytes_net are ignored to favor returning something
+    // over returning nothing. In the event of an error, the kernel will fill in
+    // zero for the destination memory. Do not change the default '= 0' below.
+
+    uint8_t proto = 0;
+    uint8_t L4_off = 0;
+    uint8_t ipVersion = 0;
+    if (skb->protocol == htons(ETH_P_IP)) {
+        (void)bpf_skb_load_bytes_net(skb, IP4_OFFSET(protocol), &proto, sizeof(proto), kver);
+        (void)bpf_skb_load_bytes_net(skb, IPPROTO_IHL_OFF, &L4_off, sizeof(L4_off), kver);
+        L4_off = (L4_off & 0x0F) * 4;  // IHL calculation.
+        ipVersion = 4;
+    } else if (skb->protocol == htons(ETH_P_IPV6)) {
+        (void)bpf_skb_load_bytes_net(skb, IP6_OFFSET(nexthdr), &proto, sizeof(proto), kver);
+        L4_off = sizeof(struct ipv6hdr);
+        ipVersion = 6;
+    }
+
+    uint8_t flags = 0;
+    __be16 sport = 0, dport = 0;
+    if (proto == IPPROTO_TCP && L4_off >= 20) {
+        (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_FLAG32_OFF + 1, &flags, sizeof(flags), kver);
+        (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_OFFSET(source), &sport, sizeof(sport), kver);
+        (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_OFFSET(dest), &dport, sizeof(dport), kver);
+    } else if (proto == IPPROTO_UDP && L4_off >= 20) {
+        (void)bpf_skb_load_bytes_net(skb, L4_off + UDP_OFFSET(source), &sport, sizeof(sport), kver);
+        (void)bpf_skb_load_bytes_net(skb, L4_off + UDP_OFFSET(dest), &dport, sizeof(dport), kver);
+    }
+
+    pkt->timestampNs = bpf_ktime_get_boot_ns();
+    pkt->ifindex = skb->ifindex;
+    pkt->length = skb->len;
+
+    pkt->uid = uid;
+    pkt->tag = tag;
+    pkt->sport = sport;
+    pkt->dport = dport;
+
+    pkt->egress = egress;
+    pkt->ipProto = proto;
+    pkt->tcpFlags = flags;
+    pkt->ipVersion = ipVersion;
+
+    bpf_packet_trace_ringbuf_submit(pkt);
+}
+
 static __always_inline inline bool skip_owner_match(struct __sk_buff* skb, const unsigned kver) {
     uint32_t flag = 0;
     if (skb->protocol == htons(ETH_P_IP)) {
         uint8_t proto;
         // no need to check for success, proto will be zeroed if bpf_skb_load_bytes_net() fails
-        (void)bpf_skb_load_bytes_net(skb, IP_PROTO_OFF, &proto, sizeof(proto), kver);
+        (void)bpf_skb_load_bytes_net(skb, IP4_OFFSET(protocol), &proto, sizeof(proto), kver);
         if (proto == IPPROTO_ESP) return true;
         if (proto != IPPROTO_TCP) return false;  // handles read failure above
         uint8_t ihl;
@@ -247,7 +321,7 @@
     } else if (skb->protocol == htons(ETH_P_IPV6)) {
         uint8_t proto;
         // no need to check for success, proto will be zeroed if bpf_skb_load_bytes_net() fails
-        (void)bpf_skb_load_bytes_net(skb, IPV6_PROTO_OFF, &proto, sizeof(proto), kver);
+        (void)bpf_skb_load_bytes_net(skb, IP6_OFFSET(nexthdr), &proto, sizeof(proto), kver);
         if (proto == IPPROTO_ESP) return true;
         if (proto != IPPROTO_TCP) return false;  // handles read failure above
         // if the read below fails, we'll just assume no TCP flags are set, which is fine.
@@ -319,6 +393,7 @@
 }
 
 static __always_inline inline int bpf_traffic_account(struct __sk_buff* skb, bool egress,
+                                                      const bool enable_tracing,
                                                       const unsigned kver) {
     uint32_t sock_uid = bpf_get_socket_uid(skb);
     uint64_t cookie = bpf_get_socket_cookie(skb);
@@ -378,34 +453,51 @@
         key.tag = 0;
     }
 
+    do_packet_tracing(skb, egress, uid, tag, enable_tracing, kver);
     update_stats_with_config(skb, egress, &key, *selectedMap);
     update_app_uid_stats_map(skb, egress, &uid);
     asm("%0 &= 1" : "+r"(match));
     return match;
 }
 
+DEFINE_BPF_PROG_EXT("cgroupskb/ingress/stats$trace", AID_ROOT, AID_SYSTEM,
+                    bpf_cgroup_ingress_trace, KVER(5, 8, 0), KVER_INF,
+                    BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, false,
+                    "fs_bpf_netd_readonly", "", false, true, false)
+(struct __sk_buff* skb) {
+    return bpf_traffic_account(skb, INGRESS, TRACE_ON, KVER(5, 8, 0));
+}
+
 DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/ingress/stats$4_19", AID_ROOT, AID_SYSTEM,
                                 bpf_cgroup_ingress_4_19, KVER(4, 19, 0), KVER_INF)
 (struct __sk_buff* skb) {
-    return bpf_traffic_account(skb, INGRESS, KVER(4, 19, 0));
+    return bpf_traffic_account(skb, INGRESS, TRACE_OFF, KVER(4, 19, 0));
 }
 
 DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/ingress/stats$4_14", AID_ROOT, AID_SYSTEM,
                                 bpf_cgroup_ingress_4_14, KVER_NONE, KVER(4, 19, 0))
 (struct __sk_buff* skb) {
-    return bpf_traffic_account(skb, INGRESS, KVER_NONE);
+    return bpf_traffic_account(skb, INGRESS, TRACE_OFF, KVER_NONE);
+}
+
+DEFINE_BPF_PROG_EXT("cgroupskb/egress/stats$trace", AID_ROOT, AID_SYSTEM,
+                    bpf_cgroup_egress_trace, KVER(5, 8, 0), KVER_INF,
+                    BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, false,
+                    "fs_bpf_netd_readonly", "", false, true, false)
+(struct __sk_buff* skb) {
+    return bpf_traffic_account(skb, EGRESS, TRACE_ON, KVER(5, 8, 0));
 }
 
 DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/egress/stats$4_19", AID_ROOT, AID_SYSTEM,
                                 bpf_cgroup_egress_4_19, KVER(4, 19, 0), KVER_INF)
 (struct __sk_buff* skb) {
-    return bpf_traffic_account(skb, EGRESS, KVER(4, 19, 0));
+    return bpf_traffic_account(skb, EGRESS, TRACE_OFF, KVER(4, 19, 0));
 }
 
 DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/egress/stats$4_14", AID_ROOT, AID_SYSTEM,
                                 bpf_cgroup_egress_4_14, KVER_NONE, KVER(4, 19, 0))
 (struct __sk_buff* skb) {
-    return bpf_traffic_account(skb, EGRESS, KVER_NONE);
+    return bpf_traffic_account(skb, EGRESS, TRACE_OFF, KVER_NONE);
 }
 
 // WARNING: Android T's non-updatable netd depends on the name of this program.
diff --git a/bpf_progs/netd.h b/bpf_progs/netd.h
index cc88680..be604f9 100644
--- a/bpf_progs/netd.h
+++ b/bpf_progs/netd.h
@@ -69,6 +69,24 @@
     uint64_t tcpTxPackets;
 } Stats;
 
+typedef struct {
+  uint64_t timestampNs;
+  uint32_t ifindex;
+  uint32_t length;
+
+  uint32_t uid;
+  uint32_t tag;
+
+  __be16 sport;
+  __be16 dport;
+
+  bool egress;
+  uint8_t ipProto;
+  uint8_t tcpFlags;
+  uint8_t ipVersion; // 4=IPv4, 6=IPv6, 0=unknown
+} PacketTrace;
+STRUCT_SIZE(PacketTrace, 8+4+4 + 4+4 + 2+2 + 1+1+1+1);
+
 // Since we cannot garbage collect the stats map since device boot, we need to make these maps as
 // large as possible. The maximum size of number of map entries we can have is depend on the rlimit
 // of MEM_LOCK granted to netd. The memory space needed by each map can be calculated by the
@@ -87,7 +105,8 @@
 // dozable_uid_map:     key:  4 bytes, value:  1 bytes, cost:  145216 bytes    =   145Kbytes
 // standby_uid_map:     key:  4 bytes, value:  1 bytes, cost:  145216 bytes    =   145Kbytes
 // powersave_uid_map:   key:  4 bytes, value:  1 bytes, cost:  145216 bytes    =   145Kbytes
-// total:                                                                         4930Kbytes
+// packet_trace_ringbuf:key:  0 bytes, value: 24 bytes, cost:   32768 bytes    =    32Kbytes
+// total:                                                                         4962Kbytes
 // It takes maximum 4.9MB kernel memory space if all maps are full, which requires any devices
 // running this module to have a memlock rlimit to be larger then 5MB. In the old qtaguid module,
 // we don't have a total limit for data entries but only have limitation of tags each uid can have.
@@ -102,6 +121,7 @@
 static const int IFACE_STATS_MAP_SIZE = 1000;
 static const int CONFIGURATION_MAP_SIZE = 2;
 static const int UID_OWNER_MAP_SIZE = 4000;
+static const int PACKET_TRACE_BUF_SIZE = 32 * 1024;
 
 #ifdef __cplusplus
 
@@ -145,6 +165,8 @@
 #define CONFIGURATION_MAP_PATH BPF_NETD_PATH "map_netd_configuration_map"
 #define UID_OWNER_MAP_PATH BPF_NETD_PATH "map_netd_uid_owner_map"
 #define UID_PERMISSION_MAP_PATH BPF_NETD_PATH "map_netd_uid_permission_map"
+#define PACKET_TRACE_RINGBUF_PATH BPF_NETD_PATH "map_netd_packet_trace_ringbuf"
+#define PACKET_TRACE_ENABLED_MAP_PATH BPF_NETD_PATH "map_netd_packet_trace_enabled_map"
 
 #endif // __cplusplus