Merge "healthd: add annotations for dumpsys diagnostics" into main
diff --git a/debuggerd/debuggerd_test.cpp b/debuggerd/debuggerd_test.cpp
index 858956a..34f2c45 100644
--- a/debuggerd/debuggerd_test.cpp
+++ b/debuggerd/debuggerd_test.cpp
@@ -335,7 +335,7 @@
   ConsumeFd(std::move(output_fd), &result);
   ASSERT_MATCH(result, R"(signal 11 \(SIGSEGV\), code 1 \(SEGV_MAPERR\), fault addr 0x0+dead)");
 
-  if (mte_supported()) {
+  if (mte_supported() && mte_enabled()) {
     // Test that the default TAGGED_ADDR_CTRL value is set.
     ASSERT_MATCH(result, R"(tagged_addr_ctrl: 000000000007fff3)"
                          R"( \(PR_TAGGED_ADDR_ENABLE, PR_MTE_TCF_SYNC, mask 0xfffe\))");
@@ -443,7 +443,7 @@
 
 TEST_P(SizeParamCrasherTest, mte_uaf) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -490,7 +490,7 @@
 
 TEST_P(SizeParamCrasherTest, mte_oob_uaf) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -522,7 +522,7 @@
 
 TEST_P(SizeParamCrasherTest, mte_overflow) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -565,7 +565,7 @@
 
 TEST_P(SizeParamCrasherTest, mte_underflow) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -614,7 +614,7 @@
   //     unsubtle chaos is sure to result.
   // https://man7.org/linux/man-pages/man3/longjmp.3.html
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -648,7 +648,7 @@
 
 TEST_F(CrasherTest, mte_async) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -678,7 +678,7 @@
 
 TEST_F(CrasherTest, mte_multiple_causes) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -764,7 +764,7 @@
 
 TEST_F(CrasherTest, mte_register_tag_dump) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -797,7 +797,7 @@
 
 TEST_F(CrasherTest, mte_fault_tag_dump_front_truncated) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -828,7 +828,7 @@
 
 TEST_F(CrasherTest, mte_fault_tag_dump) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
@@ -862,7 +862,7 @@
 
 TEST_F(CrasherTest, mte_fault_tag_dump_rear_truncated) {
 #if defined(__aarch64__)
-  if (!mte_supported()) {
+  if (!mte_supported() || !mte_enabled()) {
     GTEST_SKIP() << "Requires MTE";
   }
 
diff --git a/debuggerd/libdebuggerd/test/tombstone_proto_to_text_test.cpp b/debuggerd/libdebuggerd/test/tombstone_proto_to_text_test.cpp
index aad209a..988ca0c 100644
--- a/debuggerd/libdebuggerd/test/tombstone_proto_to_text_test.cpp
+++ b/debuggerd/libdebuggerd/test/tombstone_proto_to_text_test.cpp
@@ -175,3 +175,8 @@
   ProtoToString();
   EXPECT_MATCH(text_, "\\(BuildId: 0123456789abcdef\\)\\nSYMBOLIZE 0123456789abcdef 12345\\n");
 }
+
+TEST_F(TombstoneProtoToTextTest, uid) {
+  ProtoToString();
+  EXPECT_MATCH(text_, "\\nLOG uid: 0\\n");
+}
diff --git a/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stats.h b/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stats.h
index 8a70400..79443b2 100644
--- a/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stats.h
+++ b/fs_mgr/libsnapshot/include/libsnapshot/snapshot_stats.h
@@ -64,6 +64,7 @@
   public:
     // Not thread safe.
     static SnapshotMergeStats* GetInstance(SnapshotManager& manager);
+    SnapshotMergeStats(const std::string& path);
 
     // ISnapshotMergeStats overrides
     bool Start() override;
@@ -88,7 +89,6 @@
   private:
     bool ReadState();
     bool DeleteState();
-    SnapshotMergeStats(const std::string& path);
 
     std::string path_;
     SnapshotMergeReport report_;
diff --git a/fs_mgr/libsnapshot/snapshot_stats.cpp b/fs_mgr/libsnapshot/snapshot_stats.cpp
index 8e9d9c5..e684d87 100644
--- a/fs_mgr/libsnapshot/snapshot_stats.cpp
+++ b/fs_mgr/libsnapshot/snapshot_stats.cpp
@@ -24,9 +24,12 @@
 namespace snapshot {
 
 SnapshotMergeStats* SnapshotMergeStats::GetInstance(SnapshotManager& parent) {
-    static SnapshotMergeStats g_instance(parent.GetMergeStateFilePath());
-    CHECK_EQ(g_instance.path_, parent.GetMergeStateFilePath());
-    return &g_instance;
+    static std::unique_ptr<SnapshotMergeStats> g_instance;
+
+    if (!g_instance || g_instance->path_ != parent.GetMergeStateFilePath()) {
+        g_instance = std::make_unique<SnapshotMergeStats>(parent.GetMergeStateFilePath());
+    }
+    return g_instance.get();
 }
 
 SnapshotMergeStats::SnapshotMergeStats(const std::string& path) : path_(path), running_(false) {}
diff --git a/fs_mgr/libsnapshot/snapuserd/Android.bp b/fs_mgr/libsnapshot/snapuserd/Android.bp
index 639116e..9972bc7 100644
--- a/fs_mgr/libsnapshot/snapuserd/Android.bp
+++ b/fs_mgr/libsnapshot/snapuserd/Android.bp
@@ -88,6 +88,7 @@
         "libprocessgroup",
         "libprocessgroup_util",
         "libjsoncpp",
+        "liburing_cpp",
     ],
     export_include_dirs: ["include"],
     header_libs: [
@@ -136,6 +137,7 @@
         "libext4_utils",
         "liburing",
         "libzstd",
+        "liburing_cpp",
     ],
 
     header_libs: [
@@ -222,6 +224,7 @@
         "libjsoncpp",
         "liburing",
         "libz",
+        "liburing_cpp",
     ],
     include_dirs: [
         ".",
@@ -319,6 +322,7 @@
         "libjsoncpp",
         "liburing",
         "libz",
+        "liburing_cpp",
     ],
     include_dirs: [
         ".",
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp
index 957c6a8..97f8df4 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.cpp
@@ -22,6 +22,7 @@
 
 #include "android-base/properties.h"
 #include "snapuserd_core.h"
+#include "utility.h"
 
 namespace android {
 namespace snapshot {
@@ -104,43 +105,108 @@
         return false;
     }
 
-    loff_t file_offset = offset;
-    auto verify_block_size = android::base::GetUintProperty<uint>("ro.virtual_ab.verify_block_size",
-                                                                  kBlockSizeVerify);
-    const uint64_t read_sz = verify_block_size;
+    int queue_depth = std::max(queue_depth_, 1);
+    int verify_block_size = verify_block_size_;
 
-    void* addr;
-    ssize_t page_size = getpagesize();
-    if (posix_memalign(&addr, page_size, read_sz) < 0) {
-        SNAP_PLOG(ERROR) << "posix_memalign failed "
-                         << " page_size: " << page_size << " read_sz: " << read_sz;
+    // Smaller partitions don't need a bigger queue-depth.
+    // This is required for low-memory devices.
+    if (dev_sz < threshold_size_) {
+        queue_depth = std::max(queue_depth / 2, 1);
+        verify_block_size >>= 2;
+    }
+
+    if (!IsBlockAligned(verify_block_size)) {
+        verify_block_size = EXT4_ALIGN(verify_block_size, BLOCK_SZ);
+    }
+
+    std::unique_ptr<io_uring_cpp::IoUringInterface> ring =
+            io_uring_cpp::IoUringInterface::CreateLinuxIoUring(queue_depth, 0);
+    if (ring.get() == nullptr) {
+        PLOG(ERROR) << "Verify: io_uring_queue_init failed for queue_depth: " << queue_depth;
         return false;
     }
 
-    std::unique_ptr<void, decltype(&::free)> buffer(addr, ::free);
-
-    uint64_t bytes_read = 0;
-
-    while (true) {
-        size_t to_read = std::min((dev_sz - file_offset), read_sz);
-
-        if (!android::base::ReadFullyAtOffset(fd.get(), buffer.get(), to_read, file_offset)) {
-            SNAP_PLOG(ERROR) << "Failed to read block from block device: " << dm_block_device
-                             << " partition-name: " << partition_name
-                             << " at offset: " << file_offset << " read-size: " << to_read
-                             << " block-size: " << dev_sz;
+    std::unique_ptr<struct iovec[]> vecs = std::make_unique<struct iovec[]>(queue_depth);
+    std::vector<std::unique_ptr<void, decltype(&::free)>> buffers;
+    for (int i = 0; i < queue_depth; i++) {
+        void* addr;
+        ssize_t page_size = getpagesize();
+        if (posix_memalign(&addr, page_size, verify_block_size) < 0) {
+            LOG(ERROR) << "posix_memalign failed";
             return false;
         }
 
-        bytes_read += to_read;
-        file_offset += (skip_blocks * verify_block_size);
-        if (file_offset >= dev_sz) {
+        buffers.emplace_back(addr, ::free);
+        vecs[i].iov_base = addr;
+        vecs[i].iov_len = verify_block_size;
+    }
+
+    auto ret = ring->RegisterBuffers(vecs.get(), queue_depth);
+    if (!ret.IsOk()) {
+        SNAP_LOG(ERROR) << "io_uring_register_buffers failed: " << ret.ErrCode();
+        return false;
+    }
+
+    loff_t file_offset = offset;
+    const uint64_t read_sz = verify_block_size;
+    uint64_t total_read = 0;
+    int num_submitted = 0;
+
+    SNAP_LOG(DEBUG) << "VerifyBlocks: queue_depth: " << queue_depth
+                    << " verify_block_size: " << verify_block_size << " dev_sz: " << dev_sz
+                    << " file_offset: " << file_offset << " skip_blocks: " << skip_blocks;
+
+    while (file_offset < dev_sz) {
+        for (size_t i = 0; i < queue_depth; i++) {
+            uint64_t to_read = std::min((dev_sz - file_offset), read_sz);
+            if (to_read <= 0) break;
+
+            const auto sqe =
+                    ring->PrepReadFixed(fd.get(), vecs[i].iov_base, to_read, file_offset, i);
+            if (!sqe.IsOk()) {
+                SNAP_PLOG(ERROR) << "PrepReadFixed failed";
+                return false;
+            }
+            file_offset += (skip_blocks * to_read);
+            total_read += to_read;
+            num_submitted += 1;
+            if (file_offset >= dev_sz) {
+                break;
+            }
+        }
+
+        if (num_submitted == 0) {
             break;
         }
+
+        const auto io_submit = ring->SubmitAndWait(num_submitted);
+        if (!io_submit.IsOk()) {
+            SNAP_LOG(ERROR) << "SubmitAndWait failed: " << io_submit.ErrMsg()
+                            << " for: " << num_submitted << " entries.";
+            return false;
+        }
+
+        SNAP_LOG(DEBUG) << "io_uring_submit: " << total_read << "num_submitted: " << num_submitted
+                        << "ret: " << ret;
+
+        const auto cqes = ring->PopCQE(num_submitted);
+        if (cqes.IsErr()) {
+            SNAP_LOG(ERROR) << "PopCqe failed for: " << num_submitted
+                            << " error: " << cqes.GetError().ErrMsg();
+            return false;
+        }
+        for (const auto& cqe : cqes.GetResult()) {
+            if (cqe.res < 0) {
+                SNAP_LOG(ERROR) << "I/O failed: cqe->res: " << cqe.res;
+                return false;
+            }
+            num_submitted -= 1;
+        }
     }
 
-    SNAP_LOG(DEBUG) << "Verification success with bytes-read: " << bytes_read
-                    << " dev_sz: " << dev_sz << " partition_name: " << partition_name;
+    SNAP_LOG(DEBUG) << "Verification success with io_uring: "
+                    << " dev_sz: " << dev_sz << " partition_name: " << partition_name
+                    << " total_read: " << total_read;
 
     return true;
 }
@@ -175,21 +241,14 @@
         return false;
     }
 
-    /*
-     * Not all partitions are of same size. Some partitions are as small as
-     * 100Mb. We can just finish them in a single thread. For bigger partitions
-     * such as product, 4 threads are sufficient enough.
-     *
-     * TODO: With io_uring SQ_POLL support, we can completely cut this
-     * down to just single thread for all partitions and potentially verify all
-     * the partitions with zero syscalls. Additionally, since block layer
-     * supports polling, IO_POLL could be used which will further cut down
-     * latency.
-     */
+    if (!KernelSupportsIoUring()) {
+        SNAP_LOG(INFO) << "Kernel does not support io_uring. Skipping verification.\n";
+        // This will fallback to update_verifier to do the verification.
+        return false;
+    }
+
     int num_threads = kMinThreadsToVerify;
-    auto verify_threshold_size = android::base::GetUintProperty<uint>(
-            "ro.virtual_ab.verify_threshold_size", kThresholdSize);
-    if (dev_sz > verify_threshold_size) {
+    if (dev_sz > threshold_size_) {
         num_threads = kMaxThreadsToVerify;
     }
 
@@ -197,13 +256,11 @@
     off_t start_offset = 0;
     const int skip_blocks = num_threads;
 
-    auto verify_block_size =
-            android::base::GetUintProperty("ro.virtual_ab.verify_block_size", kBlockSizeVerify);
     while (num_threads) {
         threads.emplace_back(std::async(std::launch::async, &UpdateVerify::VerifyBlocks, this,
                                         partition_name, dm_block_device, start_offset, skip_blocks,
                                         dev_sz));
-        start_offset += verify_block_size;
+        start_offset += verify_block_size_;
         num_threads -= 1;
         if (start_offset >= dev_sz) {
             break;
@@ -218,9 +275,9 @@
     if (ret) {
         succeeded = true;
         UpdatePartitionVerificationState(UpdateVerifyState::VERIFY_SUCCESS);
-        SNAP_LOG(INFO) << "Partition: " << partition_name << " Block-device: " << dm_block_device
-                       << " Size: " << dev_sz
-                       << " verification success. Duration : " << timer.duration().count() << " ms";
+        SNAP_LOG(INFO) << "Partition verification success: " << partition_name
+                       << " Block-device: " << dm_block_device << " Size: " << dev_sz
+                       << " Duration : " << timer.duration().count() << " ms";
         return true;
     }
 
diff --git a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h
index b300a70..69a334b 100644
--- a/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h
+++ b/fs_mgr/libsnapshot/snapuserd/user-space-merge/snapuserd_verify.h
@@ -15,6 +15,7 @@
 
 #pragma once
 
+#include <liburing.h>
 #include <stdint.h>
 #include <sys/types.h>
 
@@ -22,6 +23,7 @@
 #include <mutex>
 #include <string>
 
+#include <liburing_cpp/IoUring.h>
 #include <snapuserd/snapuserd_kernel.h>
 #include <storage_literals/storage_literals.h>
 
@@ -48,27 +50,23 @@
     std::mutex m_lock_;
     std::condition_variable m_cv_;
 
+    int kMinThreadsToVerify = 1;
+    int kMaxThreadsToVerify = 3;
+
     /*
-     * Scanning of partitions is an expensive operation both in terms of memory
-     * and CPU usage. The goal here is to scan the partitions fast enough without
-     * significant increase in the boot time.
-     *
-     * Partitions such as system, product which may be huge and may need multiple
-     * threads to speed up the verification process. Using multiple threads for
-     * all partitions may increase CPU usage significantly. Hence, limit that to
-     * 1 thread per partition.
+     * To optimize partition scanning speed without significantly impacting boot time,
+     * we employ O_DIRECT, bypassing the page-cache. However, O_DIRECT's memory
+     * allocation from CMA can be problematic on devices with restricted CMA space.
+     * To address this, io_uring_register_buffers() pre-registers I/O buffers,
+     * preventing CMA usage. See b/401952955 for more details.
      *
      * These numbers were derived by monitoring the memory and CPU pressure
      * (/proc/pressure/{cpu,memory}; and monitoring the Inactive(file) and
      * Active(file) pages from /proc/meminfo.
-     *
-     * Additionally, for low memory devices, it is advisable to use O_DIRECT
-     * functionality for source block device.
      */
-    int kMinThreadsToVerify = 1;
-    int kMaxThreadsToVerify = 3;
-    uint64_t kThresholdSize = 750_MiB;
-    uint64_t kBlockSizeVerify = 2_MiB;
+    uint64_t verify_block_size_ = 1_MiB;
+    uint64_t threshold_size_ = 2_GiB;
+    int queue_depth_ = 4;
 
     bool IsBlockAligned(uint64_t read_size) { return ((read_size & (BLOCK_SZ - 1)) == 0); }
     void UpdatePartitionVerificationState(UpdateVerifyState state);
diff --git a/init/README.md b/init/README.md
index c9742ad..6a66f14 100644
--- a/init/README.md
+++ b/init/README.md
@@ -971,26 +971,13 @@
 
 Bootcharting
 ------------
-This version of init contains code to perform "bootcharting": generating log
-files that can be later processed by the tools provided by <http://www.bootchart.org/>.
+Bootchart provides CPU and I/O load breakdown of all processes for the whole system.
+Refer to the instructions at
+ <https://source.android.com/docs/core/perf/boot-times#bootchart>.
 
 On the emulator, use the -bootchart _timeout_ option to boot with bootcharting
 activated for _timeout_ seconds.
 
-On a device:
-
-    adb shell 'touch /data/bootchart/enabled'
-
-Don't forget to delete this file when you're done collecting data!
-
-The log files are written to /data/bootchart/. A script is provided to
-retrieve them and create a bootchart.tgz file that can be used with the
-bootchart command-line utility:
-
-    sudo apt-get install pybootchartgui
-    # grab-bootchart.sh uses $ANDROID_SERIAL.
-    $ANDROID_BUILD_TOP/system/core/init/grab-bootchart.sh
-
 One thing to watch for is that the bootchart will show init as if it started
 running at 0s. You'll have to look at dmesg to work out when the kernel
 actually started init.
diff --git a/init/compare-bootcharts.py b/init/compare-bootcharts.py
index 009b639..b299b7d 100755
--- a/init/compare-bootcharts.py
+++ b/init/compare-bootcharts.py
@@ -47,7 +47,7 @@
 def analyze_process_maps(process_map1, process_map2, jiffy_record):
     # List interesting processes here
     processes_of_interest = [
-        '/init',
+        '/system/bin/init',
         '/system/bin/surfaceflinger',
         '/system/bin/bootanimation',
         'zygote64',
diff --git a/init/service_utils.cpp b/init/service_utils.cpp
index f8821a0..8d9a046 100644
--- a/init/service_utils.cpp
+++ b/init/service_utils.cpp
@@ -98,7 +98,17 @@
         // Look up the filesystems that were mounted under /sys before we wiped
         // it and attempt to restore them.
         for (const auto& entry : mounts) {
-            if (entry.mount_point.starts_with("/sys/")) {
+            // Never mount /sys/kernel/debug/tracing. This is the *one* mount
+            // that is special within Linux kernel: for backward compatibility
+            // tracefs gets auto-mounted there whenever one mounts debugfs [1].
+            //
+            // Attempting to mount the filesystem here will cause SELinux
+            // denials, because unlike *all other* filesystems in Android, it's
+            // not init who mounted it so there's no policy that would allow it.
+            //
+            // [1] https://lore.kernel.org/lkml/20150204143755.694479564@goodmis.org/
+            if (entry.mount_point.starts_with("/sys/") &&
+                entry.mount_point != "/sys/kernel/debug/tracing") {
                 if (mount(entry.blk_device.c_str(), entry.mount_point.c_str(),
                           entry.fs_type.c_str(), entry.flags, "")) {
                     LOG(WARNING) << "Could not mount(" << entry.mount_point
diff --git a/libprocessgroup/profiles/task_profiles.json b/libprocessgroup/profiles/task_profiles.json
index 720cb30..42cdb91 100644
--- a/libprocessgroup/profiles/task_profiles.json
+++ b/libprocessgroup/profiles/task_profiles.json
@@ -597,7 +597,7 @@
           "Params":
           {
             "Name": "MemSoftLimit",
-            "Value": "16MB"
+            "Value": "16M"
           }
         },
         {
@@ -619,7 +619,7 @@
           "Params":
           {
             "Name": "MemSoftLimit",
-            "Value": "512MB"
+            "Value": "512M"
           }
         },
         {
diff --git a/rootdir/init.rc b/rootdir/init.rc
index 1545d09..c25a9ac 100644
--- a/rootdir/init.rc
+++ b/rootdir/init.rc
@@ -1233,7 +1233,7 @@
 # and chown/chmod does not work for /proc/sys/ entries.
 # So proxy writes through init.
 on property:sys.sysctl.extra_free_kbytes=*
-    exec -- /system/bin/extra_free_kbytes.sh ${sys.sysctl.extra_free_kbytes}
+    exec_background -- /system/bin/extra_free_kbytes.sh ${sys.sysctl.extra_free_kbytes}
 
 # Allow users to drop caches
 on property:perf.drop_caches=3