initial policy for traced_perf daemon (perf profiler)

The steps involved in setting up profiling and stack unwinding are
described in detail at go/perfetto-perf-android.

To summarize the interesting case: the daemon uses cpu-wide
perf_event_open, with userspace stack and register sampling on. For each
sample, it identifies whether the process is profileable, and obtains
the FDs for /proc/[pid]/{maps,mem} using a dedicated RT signal (with the
bionic signal handler handing over the FDs over a dedicated socket). It
then uses libunwindstack to unwind & symbolize the stacks, sending the
results to the central tracing daemon (traced).

This patch covers the app profiling use-cases. Splitting out the
"profile most things on debug builds" into a separate patch for easier
review.

Most of the exceptions in domain.te & coredomain.te come from the
"vendor_file_type" allow-rule. We want a subset of that (effectively all
libraries/executables), but I believe that in practice it's hard to use
just the specific subtypes, and we're better off allowing access to all
vendor_file_type files.

Bug: 137092007
Change-Id: I4aa482cfb3f9fb2fabf02e1dff92e2b5ce121a47
diff --git a/private/compat/29.0/29.0.ignore.cil b/private/compat/29.0/29.0.ignore.cil
index 38d980e..7a969c9 100644
--- a/private/compat/29.0/29.0.ignore.cil
+++ b/private/compat/29.0/29.0.ignore.cil
@@ -74,6 +74,8 @@
     system_passwd_file
     system_unsolzygote_socket
     tethering_service
+    traced_perf
+    traced_perf_socket
     timezonedetector_service
     untrusted_app_29
     usb_serial_device
diff --git a/private/coredomain.te b/private/coredomain.te
index dac061a..44052c3 100644
--- a/private/coredomain.te
+++ b/private/coredomain.te
@@ -29,6 +29,7 @@
         -postinstall_dexopt
         -rs # spawned by appdomain, so carryover the exception above
         -system_server
+        -traced_perf
     } vendor_app_file:dir { open read getattr search };
 ')
 
@@ -44,6 +45,7 @@
         -postinstall_dexopt
         -rs # spawned by appdomain, so carryover the exception above
         -system_server
+        -traced_perf
         -mediaserver
     } vendor_app_file:file r_file_perms;
 ')
@@ -60,6 +62,7 @@
         -postinstall_dexopt
         -rs # spawned by appdomain, so carryover the exception above
         -system_server
+        -traced_perf
         -app_zygote
         -webview_zygote
         -zygote
@@ -78,6 +81,7 @@
         -postinstall_dexopt
         -rs # spawned by appdomain, so carryover the exception above
         -system_server
+        -traced_perf
         -app_zygote
         -webview_zygote
         -zygote
diff --git a/private/domain.te b/private/domain.te
index 08d963c..dfd8038 100644
--- a/private/domain.te
+++ b/private/domain.te
@@ -296,6 +296,7 @@
 neverallow ~{
   dac_override_allowed
   iorap_prefetcherd
+  traced_perf
   traced_probes
   userdebug_or_eng(`heapprofd')
 } self:global_capability_class_set dac_read_search;
diff --git a/private/ephemeral_app.te b/private/ephemeral_app.te
index 508653c..56d4747 100644
--- a/private/ephemeral_app.te
+++ b/private/ephemeral_app.te
@@ -53,9 +53,9 @@
 # connecting to its producer socket and obtaining a (per-process) tmpfs fd.
 perfetto_producer(ephemeral_app)
 
-# Allow heap profiling if the app opts in by being marked
-# profileable/debuggable.
+# Allow profiling if the app opts in by being marked profileable/debuggable.
 can_profile_heap(ephemeral_app)
+can_profile_perf(ephemeral_app)
 
 # allow ephemeral apps to use UDP sockets provided by the system server but not
 # modify them other than to connect
diff --git a/private/file_contexts b/private/file_contexts
index c7729d8..3e36cd6 100644
--- a/private/file_contexts
+++ b/private/file_contexts
@@ -150,8 +150,9 @@
 /dev/socket/tombstoned_crash u:object_r:tombstoned_crash_socket:s0
 /dev/socket/tombstoned_java_trace u:object_r:tombstoned_java_trace_socket:s0
 /dev/socket/tombstoned_intercept u:object_r:tombstoned_intercept_socket:s0
-/dev/socket/traced_producer	u:object_r:traced_producer_socket:s0
 /dev/socket/traced_consumer	u:object_r:traced_consumer_socket:s0
+/dev/socket/traced_perf	u:object_r:traced_perf_socket:s0
+/dev/socket/traced_producer	u:object_r:traced_producer_socket:s0
 /dev/socket/heapprofd	u:object_r:heapprofd_socket:s0
 /dev/socket/uncrypt	u:object_r:uncrypt_socket:s0
 /dev/socket/wpa_eth[0-9] u:object_r:wpa_socket:s0
@@ -282,6 +283,7 @@
 /system/bin/rss_hwm_reset	u:object_r:rss_hwm_reset_exec:s0
 /system/bin/perfetto        u:object_r:perfetto_exec:s0
 /system/bin/traced        u:object_r:traced_exec:s0
+/system/bin/traced_perf        u:object_r:traced_perf_exec:s0
 /system/bin/traced_probes        u:object_r:traced_probes_exec:s0
 /system/bin/heapprofd        u:object_r:heapprofd_exec:s0
 /system/bin/uncrypt     u:object_r:uncrypt_exec:s0
diff --git a/private/isolated_app.te b/private/isolated_app.te
index 49e9065..4c6c5aa 100644
--- a/private/isolated_app.te
+++ b/private/isolated_app.te
@@ -62,9 +62,10 @@
 # connecting to its producer socket and obtaining a (per-process) tmpfs fd.
 perfetto_producer(isolated_app)
 
-# Allow heap profiling if the main app has been marked as profileable or
+# Allow profiling if the main app has been marked as profileable or
 # debuggable.
 can_profile_heap(isolated_app)
+can_profile_perf(isolated_app)
 
 #####
 ##### Neverallow
diff --git a/private/priv_app.te b/private/priv_app.te
index c879c33..16f651a 100644
--- a/private/priv_app.te
+++ b/private/priv_app.te
@@ -123,9 +123,9 @@
 binder_call(priv_app, incidentd)
 allow priv_app incidentd:fifo_file { read write };
 
-# Allow heap profiling if the app opts in by being marked
-# profileable/debuggable.
+# Allow profiling if the app opts in by being marked profileable/debuggable.
 can_profile_heap(priv_app)
+can_profile_perf(priv_app)
 
 # Allow priv_apps to check whether Dynamic System Update is enabled
 get_prop(priv_app, dynamic_system_prop)
diff --git a/private/traced_perf.te b/private/traced_perf.te
new file mode 100644
index 0000000..7a78d79
--- /dev/null
+++ b/private/traced_perf.te
@@ -0,0 +1,53 @@
+# Performance profiler, backed by perf_event_open(2).
+# See go/perfetto-perf-android.
+typeattribute traced_perf coredomain;
+typeattribute traced_perf mlstrustedsubject;
+
+type traced_perf_exec, system_file_type, exec_type, file_type;
+
+init_daemon_domain(traced_perf)
+perfetto_producer(traced_perf)
+
+# Allow traced_perf full use of perf_event_open(2). It will perform cpu-wide
+# profiling, but retain samples only for profileable processes.
+# Thread-specific profiling is still disallowed due to a PTRACE_MODE_ATTACH
+# check (which would require a process:attach SELinux allow-rule).
+allow traced_perf self:perf_event { open cpu kernel read write tracepoint };
+
+# Allow CAP_KILL for delivery of dedicated signal to obtain proc-fds from a
+# process. Allow CAP_DAC_READ_SEARCH for stack unwinding and symbolization of
+# sampled stacks, which requires opening the backing libraries/executables (as
+# symbols are usually not mapped into the process space). Not all such files
+# are world-readable, e.g. odex files that included user profiles during
+# profile-guided optimization.
+allow traced_perf self:capability { kill dac_read_search };
+
+# Allow reading /system/data/packages.list.
+allow traced_perf packages_list_file:file r_file_perms;
+
+# Allow reading files for stack unwinding and symbolization.
+r_dir_file(traced_perf, nativetest_data_file)
+r_dir_file(traced_perf, system_file_type)
+r_dir_file(traced_perf, apk_data_file)
+r_dir_file(traced_perf, dalvikcache_data_file)
+r_dir_file(traced_perf, vendor_file_type)
+
+# Do not audit the cases where traced_perf attempts to access /proc/[pid] for
+# domains that it cannot read.
+dontaudit traced_perf domain:dir { search getattr open };
+
+# Never allow access to app data files
+neverallow traced_perf { app_data_file privapp_data_file system_app_data_file }:file *;
+
+# Never allow profiling highly privileged processes.
+never_profile_heap(`{
+  bpfloader
+  init
+  kernel
+  keystore
+  llkd
+  logd
+  ueventd
+  vendor_init
+  vold
+}')
diff --git a/private/untrusted_app_all.te b/private/untrusted_app_all.te
index 769ddb0..d9fd5a1 100644
--- a/private/untrusted_app_all.te
+++ b/private/untrusted_app_all.te
@@ -137,9 +137,9 @@
 # connecting to its producer socket and obtaining a (per-process) tmpfs fd.
 perfetto_producer(untrusted_app_all)
 
-# Allow heap profiling if the app opts in by being marked
-# profileable/debuggable.
+# Allow profiling if the app opts in by being marked profileable/debuggable.
 can_profile_heap(untrusted_app_all)
+can_profile_perf(untrusted_app_all)
 
 # allow untrusted apps to use UDP sockets provided by the system server but not
 # modify them other than to connect
diff --git a/public/domain.te b/public/domain.te
index 604df89..1312ad9 100644
--- a/public/domain.te
+++ b/public/domain.te
@@ -987,6 +987,7 @@
     userdebug_or_eng(`-heapprofd')
     -shell
     -system_executes_vendor_violators
+    -traced_perf # library/binary access for symbolization
     -ueventd # reads /vendor/ueventd.rc
   } {
     vendor_file_type
@@ -1310,10 +1311,11 @@
     -appdomain
     -bootanim
     -crash_dump
+    -heapprofd
     -init
     -iorap_prefetcherd
     -kernel
-    -heapprofd
+    -traced_perf
     -ueventd
   } vendor_file:file { no_w_file_perms no_x_file_perms open };
 ')
diff --git a/public/file.te b/public/file.te
index ef30fc7..408d515 100644
--- a/public/file.te
+++ b/public/file.te
@@ -457,8 +457,9 @@
 type tombstoned_crash_socket, file_type, coredomain_socket, mlstrustedobject;
 type tombstoned_java_trace_socket, file_type, mlstrustedobject;
 type tombstoned_intercept_socket, file_type, coredomain_socket;
-type traced_producer_socket, file_type, coredomain_socket, mlstrustedobject;
 type traced_consumer_socket, file_type, coredomain_socket, mlstrustedobject;
+type traced_perf_socket, file_type, coredomain_socket, mlstrustedobject;
+type traced_producer_socket, file_type, coredomain_socket, mlstrustedobject;
 type uncrypt_socket, file_type, coredomain_socket;
 type wpa_socket, file_type, data_file_type, core_data_file_type;
 type zygote_socket, file_type, coredomain_socket;
diff --git a/public/te_macros b/public/te_macros
index b69c800..2d0e050 100644
--- a/public/te_macros
+++ b/public/te_macros
@@ -718,6 +718,34 @@
 ')
 
 ###################################
+# can_profile_perf(domain)
+# Allow processes within the domain to be profiled, and have their stacks
+# sampled, by traced_perf.
+define(`can_profile_perf', `
+  # Allow directory & file read to traced_perf, as it stat(2)s /proc/[pid], and
+  # reads /proc/[pid]/cmdline.
+  allow traced_perf $1:file r_file_perms;
+  allow traced_perf $1:dir r_dir_perms;
+
+  # Allow central daemon to send signal to request /proc/[pid]/maps and
+  # /proc/[pid]/mem fds from this process.
+  allow traced_perf $1:process signal;
+
+  # Allow connecting to the daemon.
+  unix_socket_connect($1, traced_perf, traced_perf)
+  # Allow daemon to use the passed fds.
+  allow traced_perf $1:fd use;
+')
+
+###################################
+# never_profile_perf(domain)
+# Opt out of profiling by traced_perf.
+define(`never_profile_perf', `
+  neverallow traced_perf $1:file read;
+  neverallow traced_perf $1:process signal;
+')
+
+###################################
 # perfetto_producer(domain)
 # Allow processes within the domain to write data to Perfetto.
 define(`perfetto_producer', `
diff --git a/public/traced_perf.te b/public/traced_perf.te
new file mode 100644
index 0000000..f9a0324
--- /dev/null
+++ b/public/traced_perf.te
@@ -0,0 +1 @@
+type traced_perf, domain;