Merge "change ownership of for-system subdirs to AID_CLAT" into main am: e9560e72d2 am: 746f715d6d

Original change: https://android-review.googlesource.com/c/platform/system/core/+/3543304

Change-Id: I3d4e81d5788dc44ad9d23d5a801492033b1c762b
Signed-off-by: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>
diff --git a/PREUPLOAD.cfg b/PREUPLOAD.cfg
index f47c317..ab6430f 100644
--- a/PREUPLOAD.cfg
+++ b/PREUPLOAD.cfg
@@ -8,4 +8,3 @@
 rustfmt = --config-path=rustfmt.toml
 
 [Hook Scripts]
-aosp_hook = ${REPO_ROOT}/frameworks/base/tools/aosp/aosp_sha.sh ${PREUPLOAD_COMMIT} "."
diff --git a/fs_mgr/README.overlayfs.md b/fs_mgr/README.overlayfs.md
index 94b2f8c..df5d775 100644
--- a/fs_mgr/README.overlayfs.md
+++ b/fs_mgr/README.overlayfs.md
@@ -79,16 +79,15 @@
   done file by file. Be mindful of wasted space. For example, defining
   **BOARD_IMAGE_PARTITION_RESERVED_SIZE** has a negative impact on the
   right-sizing of images and requires more free dynamic partition space.
-- The kernel requires **CONFIG_OVERLAY_FS=y**. If the kernel version is higher
-  than 4.4, it requires source to be in line with android-common kernels. 
-  The patch series is available on the upstream mailing list and the latest as
-  of Sep 5 2019 is https://www.spinics.net/lists/linux-mtd/msg08331.html
-  This patch adds an override_creds _mount_ option to OverlayFS that
-  permits legacy behavior for systems that do not have overlapping
-  sepolicy rules, principals of least privilege, which is how Android behaves.
-  For 4.19 and higher a rework of the xattr handling to deal with recursion
-  is required. https://patchwork.kernel.org/patch/11117145/ is a start of that
-  adjustment.
+- The kernel requires **CONFIG_OVERLAY_FS=y**. overlayfs is used 'as is' as of
+  android 16, no modifications are required.
+- In order for overlayfs to work, overlays are mounted in the overlay_remounter
+  domain, defined here: system/sepolicy/private/overlay_remounter.te. This domain
+  must have full access to the files on the underlying volumes, add any other file
+  and directory types here
+- For devices with dynamic partitions, we use a simpler logic to decide which
+  partitions to remount, being all logical ones. In case this isn't correct,
+  we added the overlay=on and overlay=off mount flags to allow detailed control.
 - _adb enable-verity_ frees up OverlayFS and reverts the device to the state
   prior to content updates. The update engine performs a full OTA.
 - _adb remount_ overrides are incompatible with OTA resources, so the update
diff --git a/fs_mgr/fs_mgr_overlayfs_mount.cpp b/fs_mgr/fs_mgr_overlayfs_mount.cpp
index 69d3161..762e70d 100644
--- a/fs_mgr/fs_mgr_overlayfs_mount.cpp
+++ b/fs_mgr/fs_mgr_overlayfs_mount.cpp
@@ -49,6 +49,10 @@
 #include "fs_mgr_overlayfs_mount.h"
 #include "fs_mgr_priv.h"
 
+// Flag to simplify algorithm for choosing which partitions to overlay to simply overlay
+// all dynamic partitions
+constexpr bool overlay_dynamic_partitions_only = true;
+
 using namespace std::literals;
 using namespace android::fs_mgr;
 using namespace android::storage_literals;
@@ -669,6 +673,19 @@
 
     Fstab candidates;
     for (const auto& entry : fstab) {
+        // fstab overlay flag overrides all other behavior
+        if (entry.fs_mgr_flags.overlay_off) continue;
+        if (entry.fs_mgr_flags.overlay_on) {
+            candidates.push_back(entry);
+            continue;
+        }
+
+        // overlay_dynamic_partitions_only simplifies logic to overlay exactly dynamic partitions
+        if (overlay_dynamic_partitions_only) {
+            if (entry.fs_mgr_flags.logical) candidates.push_back(entry);
+            continue;
+        }
+
         // Filter out partitions whose type doesn't match what's mounted.
         // This avoids spammy behavior on devices which can mount different
         // filesystems for each partition.
diff --git a/fs_mgr/libfstab/fstab.cpp b/fs_mgr/libfstab/fstab.cpp
index 010fbc8..ec23ce5 100644
--- a/fs_mgr/libfstab/fstab.cpp
+++ b/fs_mgr/libfstab/fstab.cpp
@@ -209,6 +209,8 @@
         CheckFlag("metadata_csum", ext_meta_csum);
         CheckFlag("fscompress", fs_compress);
         CheckFlag("overlayfs_remove_missing_lowerdir", overlayfs_remove_missing_lowerdir);
+        CheckFlag("overlay=on", overlay_on);
+        CheckFlag("overlay=off", overlay_off);
 
 #undef CheckFlag
 
diff --git a/fs_mgr/libfstab/include/fstab/fstab.h b/fs_mgr/libfstab/include/fstab/fstab.h
index 0ff3188..4924ae3 100644
--- a/fs_mgr/libfstab/include/fstab/fstab.h
+++ b/fs_mgr/libfstab/include/fstab/fstab.h
@@ -87,6 +87,8 @@
         bool fs_compress : 1;
         bool overlayfs_remove_missing_lowerdir : 1;
         bool is_zoned : 1;
+        bool overlay_on : 1;
+        bool overlay_off : 1;
     } fs_mgr_flags = {};
 
     bool is_encryptable() const { return fs_mgr_flags.crypt; }
diff --git a/healthd/BatteryMonitor.cpp b/healthd/BatteryMonitor.cpp
index b0a14bb..0e75033 100644
--- a/healthd/BatteryMonitor.cpp
+++ b/healthd/BatteryMonitor.cpp
@@ -715,49 +715,54 @@
     char vs[128];
     const HealthInfo& props = *mHealthInfo;
 
+    snprintf(vs, sizeof(vs), "Cached HealthInfo:\n");
+    write(fd, vs, strlen(vs));
     snprintf(vs, sizeof(vs),
-             "ac: %d usb: %d wireless: %d dock: %d current_max: %d voltage_max: %d\n",
+             "  ac: %d usb: %d wireless: %d dock: %d current_max: %d voltage_max: %d\n",
              props.chargerAcOnline, props.chargerUsbOnline, props.chargerWirelessOnline,
              props.chargerDockOnline, props.maxChargingCurrentMicroamps,
              props.maxChargingVoltageMicrovolts);
     write(fd, vs, strlen(vs));
-    snprintf(vs, sizeof(vs), "status: %d health: %d present: %d\n",
+    snprintf(vs, sizeof(vs), "  status: %d health: %d present: %d\n",
              props.batteryStatus, props.batteryHealth, props.batteryPresent);
     write(fd, vs, strlen(vs));
-    snprintf(vs, sizeof(vs), "level: %d voltage: %d temp: %d\n", props.batteryLevel,
+    snprintf(vs, sizeof(vs), "  level: %d voltage: %d temp: %d\n", props.batteryLevel,
              props.batteryVoltageMillivolts, props.batteryTemperatureTenthsCelsius);
     write(fd, vs, strlen(vs));
 
     if (!mHealthdConfig->batteryCurrentNowPath.empty()) {
+        snprintf(vs, sizeof(vs), "  current now: %d\n", props.batteryCurrentMicroamps);
+        write(fd, vs, strlen(vs));
+    }
+
+    if (!mHealthdConfig->batteryCycleCountPath.empty()) {
+        snprintf(vs, sizeof(vs), "  cycle count: %d\n", props.batteryCycleCount);
+        write(fd, vs, strlen(vs));
+    }
+
+    if (!mHealthdConfig->batteryFullChargePath.empty()) {
+        snprintf(vs, sizeof(vs), "  Full charge: %d\n", props.batteryFullChargeUah);
+        write(fd, vs, strlen(vs));
+    }
+
+    snprintf(vs, sizeof(vs), "Real-time Values:\n");
+    write(fd, vs, strlen(vs));
+
+    if (!mHealthdConfig->batteryCurrentNowPath.empty()) {
         v = getIntField(mHealthdConfig->batteryCurrentNowPath);
-        snprintf(vs, sizeof(vs), "current now: %d\n", v);
+        snprintf(vs, sizeof(vs), "  current now: %d\n", v);
         write(fd, vs, strlen(vs));
     }
 
     if (!mHealthdConfig->batteryCurrentAvgPath.empty()) {
         v = getIntField(mHealthdConfig->batteryCurrentAvgPath);
-        snprintf(vs, sizeof(vs), "current avg: %d\n", v);
+        snprintf(vs, sizeof(vs), "  current avg: %d\n", v);
         write(fd, vs, strlen(vs));
     }
 
     if (!mHealthdConfig->batteryChargeCounterPath.empty()) {
         v = getIntField(mHealthdConfig->batteryChargeCounterPath);
-        snprintf(vs, sizeof(vs), "charge counter: %d\n", v);
-        write(fd, vs, strlen(vs));
-    }
-
-    if (!mHealthdConfig->batteryCurrentNowPath.empty()) {
-        snprintf(vs, sizeof(vs), "current now: %d\n", props.batteryCurrentMicroamps);
-        write(fd, vs, strlen(vs));
-    }
-
-    if (!mHealthdConfig->batteryCycleCountPath.empty()) {
-        snprintf(vs, sizeof(vs), "cycle count: %d\n", props.batteryCycleCount);
-        write(fd, vs, strlen(vs));
-    }
-
-    if (!mHealthdConfig->batteryFullChargePath.empty()) {
-        snprintf(vs, sizeof(vs), "Full charge: %d\n", props.batteryFullChargeUah);
+        snprintf(vs, sizeof(vs), "  charge counter: %d\n", v);
         write(fd, vs, strlen(vs));
     }
 }
diff --git a/init/Android.bp b/init/Android.bp
index b209c47..9edbe9d 100644
--- a/init/Android.bp
+++ b/init/Android.bp
@@ -292,6 +292,9 @@
         "make_f2fs",
         "mke2fs",
         "sload_f2fs",
+
+        // TODO: Revert after go/android-memcgv2-exp b/386797433
+        "memcgv2_activation_depth",
     ],
 }
 
@@ -691,3 +694,10 @@
         default: ["init_first_stage"],
     }),
 }
+
+// TODO: Revert after go/android-memcgv2-exp b/386797433
+sh_binary {
+    name: "memcgv2_activation_depth",
+    src: "memcgv2_activation_depth.sh",
+    filename_from_src: true,
+}
diff --git a/init/memcgv2_activation_depth.sh b/init/memcgv2_activation_depth.sh
new file mode 100644
index 0000000..91d215d
--- /dev/null
+++ b/init/memcgv2_activation_depth.sh
@@ -0,0 +1,87 @@
+#!/bin/sh
+
+# This script adjusts overrides of the memcg v2 MaxActivationDepth value at runtime.
+# The override value needs to be accessible starting very early in the Android boot, where aconfig
+# flags and system properties do not work. A file on /metadata is used instead.
+
+# The kernel allows this to be as high as 65535, but our Android hierarchy is never that deep.
+MAX_ALLOWED_DEPTH=5
+
+# Store overridden MaxActivationDepths here for libprocessgroup to find them
+OVERRIDE_FILE_PATH="/metadata/libprocessgroup/memcg_v2_max_activation_depth"
+
+if [ "$#" -ne 1 ]
+then
+    echo "Usage: $0 <memcg v2 MaxActivationDepth value>"
+    exit 99
+fi
+
+max_activation_depth=$1
+
+if [[ $max_activation_depth != +([0-9]) ]]
+then
+    echo "MaxActivationDepth value must be a positive integer: $max_activation_depth"
+    exit 98
+fi
+
+if [ $max_activation_depth -lt 0 ]
+then
+    echo "Negative MaxActivationDepth is invalid: $max_activation_depth"
+    exit 97
+fi
+
+if [ $max_activation_depth -gt $MAX_ALLOWED_DEPTH ]
+then
+    echo "MaxActivationDepth is too large: $max_activation_depth"
+    exit 96
+fi
+
+grep memory /sys/fs/cgroup/cgroup.controllers
+if [ $? -ne 0 ]
+then
+    echo "memcg v2 is not available on this device!"
+    exit 95
+fi
+
+current_activation_depth=$(cat $OVERRIDE_FILE_PATH)
+if [ $? -ne 0 ]
+then
+    # Find the default activation depth in the absence of any properties / overrides.
+    #
+    # To do this 100% correctly requires JSON parsing which we don't really want to do here.
+    # We know that this will be called only for Pixel (for a limited-duration experiment), and that
+    # Pixel does not override cgroups.json, therefore we can assume that the system cgroups.json has
+    # only a single MaxActivationDepth entry which corresponds to the v2 memory controller. So we
+    # can just grep for the default value.
+    default_activation_depth=$(grep MaxActivationDepth /system/etc/cgroups.json | tr -dc '0-9')
+    if [ $? -ne 0 -o $default_activation_depth -gt $MAX_ALLOWED_DEPTH ]
+    then
+        # If MaxActivationDepth is not present, libprocessgroup does not limit how deep it will activate
+        default_activation_depth=$MAX_ALLOWED_DEPTH
+    fi
+    current_activation_depth=$default_activation_depth
+fi
+
+# libprocessgroup will pick this up for all future cgroup creations, including on the next boot
+echo $max_activation_depth > $OVERRIDE_FILE_PATH
+chmod ugo+r $OVERRIDE_FILE_PATH
+
+if [ $max_activation_depth -lt $current_activation_depth ]
+then
+    # We can deactivate memcgs which are deeper than the new depth value, however that would leave
+    # behind zombie memcgs which would ruin the metrics produced from this device. The only way to
+    # eliminate those zombies is to remove the entire cgroup, which we cannot do without killing
+    # all the contained processes. So the only real option we have is to reboot here, but that would
+    # look like a random reboot to users. So don't do anything now. Wait until the next reboot for
+    # the new setting to be applied.
+    :
+elif [ $max_activation_depth -gt $current_activation_depth ]
+then
+    for d in $(seq $max_activation_depth)
+    do
+        for f in $(find /sys/fs/cgroup/ -mindepth $d -maxdepth $d -name cgroup.subtree_control)
+        do
+            echo "+memory" > $f
+        done
+    done
+fi
diff --git a/libprocessgroup/util/util.cpp b/libprocessgroup/util/util.cpp
index c772bc5..a15a44f 100644
--- a/libprocessgroup/util/util.cpp
+++ b/libprocessgroup/util/util.cpp
@@ -18,15 +18,19 @@
 
 #include <algorithm>
 #include <iterator>
+#include <mutex>
 #include <optional>
 #include <string_view>
 
 #include <mntent.h>
+#include <unistd.h>
 
 #include <android-base/file.h>
 #include <android-base/logging.h>
+#include <android-base/parseint.h>
 #include <android-base/properties.h>
 #include <android-base/stringprintf.h>
+#include <android-base/strings.h>
 #include <json/reader.h>
 #include <json/value.h>
 
@@ -174,6 +178,38 @@
     return mounts;
 }
 
+// Keep the override file open to reduce open syscalls, but read it every time.
+// Note that memcgv2_activation_depth.sh can race with us here.
+std::optional<unsigned int> ReadMaxActivationDepthMetadataOverride() {
+    static const char* OVERRIDE_FILE_PATH =
+        "/metadata/libprocessgroup/memcg_v2_max_activation_depth";
+    static int override_fd = open(OVERRIDE_FILE_PATH, O_RDONLY | O_CLOEXEC);
+    static std::mutex mtx;
+
+    std::unique_lock lock(mtx);
+    if (override_fd < 0) {
+        override_fd = open(OVERRIDE_FILE_PATH, O_RDONLY | O_CLOEXEC);
+        if (override_fd < 0) return std::nullopt;
+    }
+
+    std::string depth_str;
+    const bool ret = android::base::ReadFdToString(override_fd, &depth_str);
+    lseek(override_fd, 0, SEEK_SET);
+    lock.unlock();
+
+    if (!ret) {
+        PLOG(ERROR) << "Failed to read max activation depth override";
+        return std::nullopt;
+    }
+
+    unsigned int depth;
+    if (!android::base::ParseUint(android::base::Trim(depth_str), &depth)) {
+        PLOG(ERROR) << "Failed to convert max activation depth override (" << depth_str << ')';
+        return std::nullopt;
+    }
+    return depth;
+}
+
 }  // anonymous namespace
 
 
@@ -235,7 +271,10 @@
 bool ActivateControllers(const std::string& path, const CgroupDescriptorMap& descriptors) {
     for (const auto& [name, descriptor] : descriptors) {
         const uint32_t flags = descriptor.controller()->flags();
-        const uint32_t max_activation_depth = descriptor.controller()->max_activation_depth();
+        uint32_t max_activation_depth;
+        std::optional<unsigned int> metadataMaxDepth = ReadMaxActivationDepthMetadataOverride();
+        if (metadataMaxDepth) max_activation_depth = *metadataMaxDepth;
+        else max_activation_depth = descriptor.controller()->max_activation_depth();
         const unsigned int depth = GetCgroupDepth(descriptor.controller()->path(), path);
 
         if (flags & CGROUPRC_CONTROLLER_FLAG_NEEDS_ACTIVATION && depth < max_activation_depth) {
diff --git a/rootdir/init.rc b/rootdir/init.rc
index 54493d5..c25a9ac 100644
--- a/rootdir/init.rc
+++ b/rootdir/init.rc
@@ -614,6 +614,9 @@
 
     mkdir /metadata/staged-install 0770 root system
 
+    # TODO: Revert after go/android-memcgv2-exp b/386797433
+    mkdir /metadata/libprocessgroup 0775 root system
+
 on late-fs
     # Ensure that tracefs has the correct permissions.
     # This does not work correctly if it is called in post-fs.
@@ -1230,7 +1233,7 @@
 # and chown/chmod does not work for /proc/sys/ entries.
 # So proxy writes through init.
 on property:sys.sysctl.extra_free_kbytes=*
-    exec -- /system/bin/extra_free_kbytes.sh ${sys.sysctl.extra_free_kbytes}
+    exec_background -- /system/bin/extra_free_kbytes.sh ${sys.sysctl.extra_free_kbytes}
 
 # Allow users to drop caches
 on property:perf.drop_caches=3
@@ -1317,14 +1320,34 @@
 # Multi-Gen LRU Experiment
 on property:persist.device_config.mglru_native.lru_gen_config=none
   write /sys/kernel/mm/lru_gen/enabled 0
+  # Memcg v2 Experiment
+  # TODO: Revert after go/android-memcgv2-exp b/386797433
+  exec - system system -- /system/bin/memcgv2_activation_depth.sh 0
+  setprop persist.device_config.lmkd_native.psi_partial_stall_ms 70
 on property:persist.device_config.mglru_native.lru_gen_config=core
-  write /sys/kernel/mm/lru_gen/enabled 1
+  write /sys/kernel/mm/lru_gen/enabled y
+  # Memcg v2 Experiment
+  # TODO: Revert after go/android-memcgv2-exp b/386797433
+  exec - system system -- /system/bin/memcgv2_activation_depth.sh 1
+  setprop persist.device_config.lmkd_native.psi_partial_stall_ms 56
 on property:persist.device_config.mglru_native.lru_gen_config=core_and_mm_walk
-  write /sys/kernel/mm/lru_gen/enabled 3
+  write /sys/kernel/mm/lru_gen/enabled y
+  # Memcg v2 Experiment
+  # TODO: Revert after go/android-memcgv2-exp b/386797433
+  exec - system system -- /system/bin/memcgv2_activation_depth.sh 1
+  setprop persist.device_config.lmkd_native.psi_partial_stall_ms 70
 on property:persist.device_config.mglru_native.lru_gen_config=core_and_nonleaf_young
-  write /sys/kernel/mm/lru_gen/enabled 5
+  write /sys/kernel/mm/lru_gen/enabled y
+  # Memcg v2 Experiment
+  # TODO: Revert after go/android-memcgv2-exp b/386797433
+  exec - system system -- /system/bin/memcgv2_activation_depth.sh 2
+  setprop persist.device_config.lmkd_native.psi_partial_stall_ms 70
 on property:persist.device_config.mglru_native.lru_gen_config=all
-  write /sys/kernel/mm/lru_gen/enabled 7
+  write /sys/kernel/mm/lru_gen/enabled y
+  # Memcg v2 Experiment
+  # TODO: Revert after go/android-memcgv2-exp b/386797433
+  exec - system system -- /system/bin/memcgv2_activation_depth.sh 3
+  setprop persist.device_config.lmkd_native.psi_partial_stall_ms 70
 
 # Allow other processes to run `snapshotctl` through `init`. This requires
 # `set_prop` permission on `snapshotctl_prop`.
diff --git a/storaged/uid_info.cpp b/storaged/uid_info.cpp
index 0f718de..6f25898 100644
--- a/storaged/uid_info.cpp
+++ b/storaged/uid_info.cpp
@@ -23,13 +23,13 @@
 
 status_t UidInfo::writeToParcel(Parcel* parcel) const {
     parcel->writeInt32(uid);
-    parcel->writeCString(name.c_str());
+    parcel->writeString8(String8(name.c_str()));
     parcel->write(&io, sizeof(io));
 
     parcel->writeInt32(tasks.size());
     for (const auto& task_it : tasks) {
         parcel->writeInt32(task_it.first);
-        parcel->writeCString(task_it.second.comm.c_str());
+        parcel->writeString8(String8(task_it.second.comm.c_str()));
         parcel->write(&task_it.second.io, sizeof(task_it.second.io));
     }
     return OK;
@@ -37,14 +37,14 @@
 
 status_t UidInfo::readFromParcel(const Parcel* parcel) {
     uid = parcel->readInt32();
-    name = parcel->readCString();
+    name = parcel->readString8().c_str();
     parcel->read(&io, sizeof(io));
 
     uint32_t tasks_size = parcel->readInt32();
     for (uint32_t i = 0; i < tasks_size; i++) {
         task_info task;
         task.pid = parcel->readInt32();
-        task.comm = parcel->readCString();
+        task.comm = parcel->readString8().c_str();
         parcel->read(&task.io, sizeof(task.io));
         tasks[task.pid] = task;
     }