Merge "Cleanup OWNERS"
diff --git a/bpf_progs/bpf_net_helpers.h b/bpf_progs/bpf_net_helpers.h
index c39269e..b7ca3af 100644
--- a/bpf_progs/bpf_net_helpers.h
+++ b/bpf_progs/bpf_net_helpers.h
@@ -21,6 +21,18 @@
#include <stdbool.h>
#include <stdint.h>
+// bionic kernel uapi linux/udp.h header is munged...
+#define __kernel_udphdr udphdr
+#include <linux/udp.h>
+
+// Offsets from beginning of L4 (TCP/UDP) header
+#define TCP_OFFSET(field) offsetof(struct tcphdr, field)
+#define UDP_OFFSET(field) offsetof(struct udphdr, field)
+
+// Offsets from beginning of L3 (IPv4/IPv6) header
+#define IP4_OFFSET(field) offsetof(struct iphdr, field)
+#define IP6_OFFSET(field) offsetof(struct ipv6hdr, field)
+
// this returns 0 iff skb->sk is NULL
static uint64_t (*bpf_get_socket_cookie)(struct __sk_buff* skb) = (void*)BPF_FUNC_get_socket_cookie;
diff --git a/bpf_progs/netd.c b/bpf_progs/netd.c
index 1272f96..84da79d 100644
--- a/bpf_progs/netd.c
+++ b/bpf_progs/netd.c
@@ -46,8 +46,9 @@
static const bool INGRESS = false;
static const bool EGRESS = true;
-#define IP_PROTO_OFF offsetof(struct iphdr, protocol)
-#define IPV6_PROTO_OFF offsetof(struct ipv6hdr, nexthdr)
+// Used for 'bool enable_tracing'
+static const bool TRACE_ON = true;
+static const bool TRACE_OFF = false;
// offsetof(struct iphdr, ihl) -- but that's a bitfield
#define IPPROTO_IHL_OFF 0
@@ -99,6 +100,19 @@
/* never actually used from ebpf */
DEFINE_BPF_MAP_NO_NETD(iface_index_name_map, HASH, uint32_t, IfaceValue, IFACE_INDEX_NAME_MAP_SIZE)
+// A single-element configuration array, packet tracing is enabled when 'true'.
+DEFINE_BPF_MAP_EXT(packet_trace_enabled_map, ARRAY, uint32_t, bool, 1,
+ AID_ROOT, AID_SYSTEM, 0060, "fs_bpf_net_shared", "", false,
+ BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, /*ignore_on_eng*/false,
+ /*ignore_on_user*/true, /*ignore_on_userdebug*/false)
+
+// A ring buffer on which packet information is pushed. This map will only be loaded
+// on eng and userdebug devices. User devices won't load this to save memory.
+DEFINE_BPF_RINGBUF_EXT(packet_trace_ringbuf, PacketTrace, PACKET_TRACE_BUF_SIZE,
+ AID_ROOT, AID_SYSTEM, 0060, "fs_bpf_net_shared", "", false,
+ BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, /*ignore_on_eng*/false,
+ /*ignore_on_user*/true, /*ignore_on_userdebug*/false);
+
// iptables xt_bpf programs need to be usable by both netd and netutils_wrappers
// selinux contexts, because even non-xt_bpf iptables mutations are implemented as
// a full table dump, followed by an update in userspace, and then a reload into the kernel,
@@ -226,12 +240,72 @@
: bpf_skb_load_bytes(skb, L3_off, to, len);
}
+static __always_inline inline void do_packet_tracing(
+ const struct __sk_buff* const skb, const bool egress, const uint32_t uid,
+ const uint32_t tag, const bool enable_tracing, const unsigned kver) {
+ if (!enable_tracing) return;
+ if (kver < KVER(5, 8, 0)) return;
+
+ uint32_t mapKey = 0;
+ bool* traceConfig = bpf_packet_trace_enabled_map_lookup_elem(&mapKey);
+ if (traceConfig == NULL) return;
+ if (*traceConfig == false) return;
+
+ PacketTrace* pkt = bpf_packet_trace_ringbuf_reserve();
+ if (pkt == NULL) return;
+
+ // Errors from bpf_skb_load_bytes_net are ignored to favor returning something
+ // over returning nothing. In the event of an error, the kernel will fill in
+ // zero for the destination memory. Do not change the default '= 0' below.
+
+ uint8_t proto = 0;
+ uint8_t L4_off = 0;
+ uint8_t ipVersion = 0;
+ if (skb->protocol == htons(ETH_P_IP)) {
+ (void)bpf_skb_load_bytes_net(skb, IP4_OFFSET(protocol), &proto, sizeof(proto), kver);
+ (void)bpf_skb_load_bytes_net(skb, IPPROTO_IHL_OFF, &L4_off, sizeof(L4_off), kver);
+ L4_off = (L4_off & 0x0F) * 4; // IHL calculation.
+ ipVersion = 4;
+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ (void)bpf_skb_load_bytes_net(skb, IP6_OFFSET(nexthdr), &proto, sizeof(proto), kver);
+ L4_off = sizeof(struct ipv6hdr);
+ ipVersion = 6;
+ }
+
+ uint8_t flags = 0;
+ __be16 sport = 0, dport = 0;
+ if (proto == IPPROTO_TCP && L4_off >= 20) {
+ (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_FLAG32_OFF + 1, &flags, sizeof(flags), kver);
+ (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_OFFSET(source), &sport, sizeof(sport), kver);
+ (void)bpf_skb_load_bytes_net(skb, L4_off + TCP_OFFSET(dest), &dport, sizeof(dport), kver);
+ } else if (proto == IPPROTO_UDP && L4_off >= 20) {
+ (void)bpf_skb_load_bytes_net(skb, L4_off + UDP_OFFSET(source), &sport, sizeof(sport), kver);
+ (void)bpf_skb_load_bytes_net(skb, L4_off + UDP_OFFSET(dest), &dport, sizeof(dport), kver);
+ }
+
+ pkt->timestampNs = bpf_ktime_get_boot_ns();
+ pkt->ifindex = skb->ifindex;
+ pkt->length = skb->len;
+
+ pkt->uid = uid;
+ pkt->tag = tag;
+ pkt->sport = sport;
+ pkt->dport = dport;
+
+ pkt->egress = egress;
+ pkt->ipProto = proto;
+ pkt->tcpFlags = flags;
+ pkt->ipVersion = ipVersion;
+
+ bpf_packet_trace_ringbuf_submit(pkt);
+}
+
static __always_inline inline bool skip_owner_match(struct __sk_buff* skb, const unsigned kver) {
uint32_t flag = 0;
if (skb->protocol == htons(ETH_P_IP)) {
uint8_t proto;
// no need to check for success, proto will be zeroed if bpf_skb_load_bytes_net() fails
- (void)bpf_skb_load_bytes_net(skb, IP_PROTO_OFF, &proto, sizeof(proto), kver);
+ (void)bpf_skb_load_bytes_net(skb, IP4_OFFSET(protocol), &proto, sizeof(proto), kver);
if (proto == IPPROTO_ESP) return true;
if (proto != IPPROTO_TCP) return false; // handles read failure above
uint8_t ihl;
@@ -247,7 +321,7 @@
} else if (skb->protocol == htons(ETH_P_IPV6)) {
uint8_t proto;
// no need to check for success, proto will be zeroed if bpf_skb_load_bytes_net() fails
- (void)bpf_skb_load_bytes_net(skb, IPV6_PROTO_OFF, &proto, sizeof(proto), kver);
+ (void)bpf_skb_load_bytes_net(skb, IP6_OFFSET(nexthdr), &proto, sizeof(proto), kver);
if (proto == IPPROTO_ESP) return true;
if (proto != IPPROTO_TCP) return false; // handles read failure above
// if the read below fails, we'll just assume no TCP flags are set, which is fine.
@@ -319,6 +393,7 @@
}
static __always_inline inline int bpf_traffic_account(struct __sk_buff* skb, bool egress,
+ const bool enable_tracing,
const unsigned kver) {
uint32_t sock_uid = bpf_get_socket_uid(skb);
uint64_t cookie = bpf_get_socket_cookie(skb);
@@ -378,34 +453,51 @@
key.tag = 0;
}
+ do_packet_tracing(skb, egress, uid, tag, enable_tracing, kver);
update_stats_with_config(skb, egress, &key, *selectedMap);
update_app_uid_stats_map(skb, egress, &uid);
asm("%0 &= 1" : "+r"(match));
return match;
}
+DEFINE_BPF_PROG_EXT("cgroupskb/ingress/stats$trace", AID_ROOT, AID_SYSTEM,
+ bpf_cgroup_ingress_trace, KVER(5, 8, 0), KVER_INF,
+ BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, false,
+ "fs_bpf_netd_readonly", "", false, true, false)
+(struct __sk_buff* skb) {
+ return bpf_traffic_account(skb, INGRESS, TRACE_ON, KVER(5, 8, 0));
+}
+
DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/ingress/stats$4_19", AID_ROOT, AID_SYSTEM,
bpf_cgroup_ingress_4_19, KVER(4, 19, 0), KVER_INF)
(struct __sk_buff* skb) {
- return bpf_traffic_account(skb, INGRESS, KVER(4, 19, 0));
+ return bpf_traffic_account(skb, INGRESS, TRACE_OFF, KVER(4, 19, 0));
}
DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/ingress/stats$4_14", AID_ROOT, AID_SYSTEM,
bpf_cgroup_ingress_4_14, KVER_NONE, KVER(4, 19, 0))
(struct __sk_buff* skb) {
- return bpf_traffic_account(skb, INGRESS, KVER_NONE);
+ return bpf_traffic_account(skb, INGRESS, TRACE_OFF, KVER_NONE);
+}
+
+DEFINE_BPF_PROG_EXT("cgroupskb/egress/stats$trace", AID_ROOT, AID_SYSTEM,
+ bpf_cgroup_egress_trace, KVER(5, 8, 0), KVER_INF,
+ BPFLOADER_IGNORED_ON_VERSION, BPFLOADER_MAX_VER, false,
+ "fs_bpf_netd_readonly", "", false, true, false)
+(struct __sk_buff* skb) {
+ return bpf_traffic_account(skb, EGRESS, TRACE_ON, KVER(5, 8, 0));
}
DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/egress/stats$4_19", AID_ROOT, AID_SYSTEM,
bpf_cgroup_egress_4_19, KVER(4, 19, 0), KVER_INF)
(struct __sk_buff* skb) {
- return bpf_traffic_account(skb, EGRESS, KVER(4, 19, 0));
+ return bpf_traffic_account(skb, EGRESS, TRACE_OFF, KVER(4, 19, 0));
}
DEFINE_NETD_BPF_PROG_KVER_RANGE("cgroupskb/egress/stats$4_14", AID_ROOT, AID_SYSTEM,
bpf_cgroup_egress_4_14, KVER_NONE, KVER(4, 19, 0))
(struct __sk_buff* skb) {
- return bpf_traffic_account(skb, EGRESS, KVER_NONE);
+ return bpf_traffic_account(skb, EGRESS, TRACE_OFF, KVER_NONE);
}
// WARNING: Android T's non-updatable netd depends on the name of this program.
diff --git a/bpf_progs/netd.h b/bpf_progs/netd.h
index cc88680..be604f9 100644
--- a/bpf_progs/netd.h
+++ b/bpf_progs/netd.h
@@ -69,6 +69,24 @@
uint64_t tcpTxPackets;
} Stats;
+typedef struct {
+ uint64_t timestampNs;
+ uint32_t ifindex;
+ uint32_t length;
+
+ uint32_t uid;
+ uint32_t tag;
+
+ __be16 sport;
+ __be16 dport;
+
+ bool egress;
+ uint8_t ipProto;
+ uint8_t tcpFlags;
+ uint8_t ipVersion; // 4=IPv4, 6=IPv6, 0=unknown
+} PacketTrace;
+STRUCT_SIZE(PacketTrace, 8+4+4 + 4+4 + 2+2 + 1+1+1+1);
+
// Since we cannot garbage collect the stats map since device boot, we need to make these maps as
// large as possible. The maximum size of number of map entries we can have is depend on the rlimit
// of MEM_LOCK granted to netd. The memory space needed by each map can be calculated by the
@@ -87,7 +105,8 @@
// dozable_uid_map: key: 4 bytes, value: 1 bytes, cost: 145216 bytes = 145Kbytes
// standby_uid_map: key: 4 bytes, value: 1 bytes, cost: 145216 bytes = 145Kbytes
// powersave_uid_map: key: 4 bytes, value: 1 bytes, cost: 145216 bytes = 145Kbytes
-// total: 4930Kbytes
+// packet_trace_ringbuf:key: 0 bytes, value: 24 bytes, cost: 32768 bytes = 32Kbytes
+// total: 4962Kbytes
// It takes maximum 4.9MB kernel memory space if all maps are full, which requires any devices
// running this module to have a memlock rlimit to be larger then 5MB. In the old qtaguid module,
// we don't have a total limit for data entries but only have limitation of tags each uid can have.
@@ -102,6 +121,7 @@
static const int IFACE_STATS_MAP_SIZE = 1000;
static const int CONFIGURATION_MAP_SIZE = 2;
static const int UID_OWNER_MAP_SIZE = 4000;
+static const int PACKET_TRACE_BUF_SIZE = 32 * 1024;
#ifdef __cplusplus
@@ -145,6 +165,8 @@
#define CONFIGURATION_MAP_PATH BPF_NETD_PATH "map_netd_configuration_map"
#define UID_OWNER_MAP_PATH BPF_NETD_PATH "map_netd_uid_owner_map"
#define UID_PERMISSION_MAP_PATH BPF_NETD_PATH "map_netd_uid_permission_map"
+#define PACKET_TRACE_RINGBUF_PATH BPF_NETD_PATH "map_netd_packet_trace_ringbuf"
+#define PACKET_TRACE_ENABLED_MAP_PATH BPF_NETD_PATH "map_netd_packet_trace_enabled_map"
#endif // __cplusplus