Add present bytes when recording allocations.

When an allocation is freed, add information about the total
present bytes in the allocation that can be used to tell the
usage of allocations. This also applies to realloc operations
that free the previous pointer.

Add new unit tests for this functionality.

Test: All unit tests pass.
Change-Id: Id3a253e4beb1c7342711137a2bf7ebed4e25d973
diff --git a/libc/malloc_debug/Android.bp b/libc/malloc_debug/Android.bp
index 5d61801..1864138 100644
--- a/libc/malloc_debug/Android.bp
+++ b/libc/malloc_debug/Android.bp
@@ -135,6 +135,7 @@
         "tests/log_fake.cpp",
         "tests/libc_fake.cpp",
         "tests/malloc_debug_config_tests.cpp",
+        "tests/malloc_debug_record_data_tests.cpp",
         "tests/malloc_debug_unit_tests.cpp",
     ],
 
diff --git a/libc/malloc_debug/RecordData.cpp b/libc/malloc_debug/RecordData.cpp
index f832f03..1df0b0c 100644
--- a/libc/malloc_debug/RecordData.cpp
+++ b/libc/malloc_debug/RecordData.cpp
@@ -38,7 +38,6 @@
 
 #include <mutex>
 
-#include <android-base/stringprintf.h>
 #include <memory_trace/MemoryTrace.h>
 
 #include "Config.h"
@@ -150,10 +149,20 @@
   cur_index_ = 0U;
   file_ = config.record_allocs_file();
 
+  pagemap_fd_ = TEMP_FAILURE_RETRY(open("/proc/self/pagemap", O_RDONLY | O_CLOEXEC));
+  if (pagemap_fd_ == -1) {
+    error_log("Unable to open /proc/self/pagemap: %s", strerror(errno));
+    return false;
+  }
+
   return true;
 }
 
 RecordData::~RecordData() {
+  if (pagemap_fd_ != -1) {
+    close(pagemap_fd_);
+  }
+
   pthread_key_delete(key_);
 }
 
@@ -180,3 +189,78 @@
 
   return InternalReserveEntry();
 }
+
+static inline bool IsPagePresent(uint64_t page_data) {
+  // Page Present is bit 63
+  return (page_data & (1ULL << 63)) != 0;
+}
+
+int64_t RecordData::GetPresentBytes(void* ptr, size_t alloc_size) {
+  uintptr_t addr = reinterpret_cast<uintptr_t>(ptr);
+  if (addr == 0 || alloc_size == 0) {
+    return -1;
+  }
+
+  uintptr_t page_size = getpagesize();
+  uintptr_t page_size_mask = page_size - 1;
+
+  size_t start_page = (addr & ~page_size_mask) / page_size;
+  size_t last_page = ((addr + alloc_size - 1) & ~page_size_mask) / page_size;
+
+  constexpr size_t kMaxReadPages = 1024;
+  uint64_t page_data[kMaxReadPages];
+
+  int64_t present_bytes = 0;
+  size_t cur_page = start_page;
+  while (cur_page <= last_page) {
+    size_t num_pages = last_page - cur_page + 1;
+    size_t last_page_index;
+    if (num_pages > kMaxReadPages) {
+      num_pages = kMaxReadPages;
+      last_page_index = num_pages;
+    } else {
+      // Handle the last page differently, so do not handle it in the loop.
+      last_page_index = num_pages - 1;
+    }
+    ssize_t bytes_read =
+        pread64(pagemap_fd_, page_data, num_pages * sizeof(uint64_t), cur_page * sizeof(uint64_t));
+    if (bytes_read <= 0) {
+      error_log("Failed to read page data: %s", strerror(errno));
+      return -1;
+    }
+
+    size_t page_index = 0;
+    // Handling the first page is special, handle it separately.
+    if (cur_page == start_page) {
+      if (IsPagePresent(page_data[0])) {
+        present_bytes = page_size - (addr & page_size_mask);
+        if (present_bytes >= alloc_size) {
+          // The allocation fits on a single page and that page is present.
+          return alloc_size;
+        }
+      } else if (start_page == last_page) {
+        // Only one page that isn't present.
+        return 0;
+      }
+      page_index = 1;
+    }
+
+    for (; page_index < last_page_index; page_index++) {
+      if (IsPagePresent(page_data[page_index])) {
+        present_bytes += page_size;
+      }
+    }
+
+    cur_page += last_page_index;
+
+    // Check the last page in the allocation.
+    if (cur_page == last_page) {
+      if (IsPagePresent(page_data[num_pages - 1])) {
+        present_bytes += ((addr + alloc_size - 1) & page_size_mask) + 1;
+      }
+      return present_bytes;
+    }
+  }
+
+  return present_bytes;
+}
diff --git a/libc/malloc_debug/RecordData.h b/libc/malloc_debug/RecordData.h
index ce71da1..bf5cc57 100644
--- a/libc/malloc_debug/RecordData.h
+++ b/libc/malloc_debug/RecordData.h
@@ -56,6 +56,8 @@
   const std::string& file() { return file_; }
   pthread_key_t key() { return key_; }
 
+  int64_t GetPresentBytes(void* pointer, size_t size);
+
   static void WriteEntriesOnExit();
 
  private:
@@ -74,6 +76,7 @@
   std::vector<memory_trace::Entry> entries_;
   size_t cur_index_;
   std::string file_;
+  int pagemap_fd_ = -1;
 
   BIONIC_DISALLOW_COPY_AND_ASSIGN(RecordData);
 };
diff --git a/libc/malloc_debug/malloc_debug.cpp b/libc/malloc_debug/malloc_debug.cpp
index fce6c24..7e96169 100644
--- a/libc/malloc_debug/malloc_debug.cpp
+++ b/libc/malloc_debug/malloc_debug.cpp
@@ -682,6 +682,13 @@
   if (DebugCallsDisabled() || pointer == nullptr) {
     return g_dispatch->free(pointer);
   }
+
+  size_t size;
+  if (g_debug->config().options() & RECORD_ALLOCS) {
+    // Need to get the size before disabling debug calls.
+    size = debug_malloc_usable_size(pointer);
+  }
+
   ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
   ScopedBacktraceSignalBlocker blocked;
@@ -690,11 +697,16 @@
     return;
   }
 
+  int64_t present_bytes = -1;
   memory_trace::Entry* entry = nullptr;
   if (g_debug->config().options() & RECORD_ALLOCS) {
     // In order to preserve the order of operations, reserve the entry before
     // performing the operation.
     entry = g_debug->record->ReserveEntry();
+
+    // Need to get the present bytes before the pointer is freed in case the
+    // memory is released during the free call.
+    present_bytes = g_debug->record->GetPresentBytes(pointer, size);
   }
 
   TimedResult result = InternalFree(pointer);
@@ -703,6 +715,7 @@
     *entry = memory_trace::Entry{.tid = gettid(),
                                  .type = memory_trace::FREE,
                                  .ptr = reinterpret_cast<uint64_t>(pointer),
+                                 .present_bytes = present_bytes,
                                  .start_ns = result.GetStartTimeNS(),
                                  .end_ns = result.GetEndTimeNS()};
   }
@@ -815,6 +828,13 @@
   if (DebugCallsDisabled()) {
     return g_dispatch->realloc(pointer, bytes);
   }
+
+  size_t old_size;
+  if (pointer != nullptr && g_debug->config().options() & RECORD_ALLOCS) {
+    // Need to get the size before disabling debug calls.
+    old_size = debug_malloc_usable_size(pointer);
+  }
+
   ScopedConcurrentLock lock;
   ScopedDisableDebugCalls disable;
   ScopedBacktraceSignalBlocker blocked;
@@ -845,6 +865,13 @@
     return nullptr;
   }
 
+  int64_t present_bytes = -1;
+  if (g_debug->config().options() & RECORD_ALLOCS) {
+    // Need to get the present bytes before the pointer is freed in case the
+    // memory is released during the free call.
+    present_bytes = g_debug->record->GetPresentBytes(pointer, old_size);
+  }
+
   if (bytes == 0) {
     TimedResult result = InternalFree(pointer);
 
@@ -854,6 +881,7 @@
                                    .ptr = 0,
                                    .size = 0,
                                    .u.old_ptr = reinterpret_cast<uint64_t>(pointer),
+                                   .present_bytes = present_bytes,
                                    .start_ns = result.GetStartTimeNS(),
                                    .end_ns = result.GetEndTimeNS()};
     }
@@ -957,6 +985,7 @@
                                  .ptr = reinterpret_cast<uint64_t>(new_pointer),
                                  .size = bytes,
                                  .u.old_ptr = reinterpret_cast<uint64_t>(pointer),
+                                 .present_bytes = present_bytes,
                                  .start_ns = result.GetStartTimeNS(),
                                  .end_ns = result.GetEndTimeNS()};
   }
diff --git a/libc/malloc_debug/tests/malloc_debug_record_data_tests.cpp b/libc/malloc_debug/tests/malloc_debug_record_data_tests.cpp
new file mode 100644
index 0000000..b94dc8f
--- /dev/null
+++ b/libc/malloc_debug/tests/malloc_debug_record_data_tests.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2024 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <gtest/gtest.h>
+
+#include "Config.h"
+#include "RecordData.h"
+
+#include "log_fake.h"
+
+class MallocDebugRecordDataTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    page_size_ = getpagesize();
+    Config config;
+    ASSERT_TRUE(config.Init("record_allocs"));
+    ASSERT_TRUE(record_.Initialize(config));
+  }
+
+  uint8_t* AllocPageAligned(size_t alloc_pages) {
+    uint8_t* ptr = reinterpret_cast<uint8_t*>(memalign(page_size_, alloc_pages * page_size_));
+    if (ptr == nullptr) {
+      return nullptr;
+    }
+    // Release all the pages so the test can make them present.
+    EXPECT_EQ(0, madvise(ptr, page_size_ * alloc_pages, MADV_DONTNEED));
+    return ptr;
+  }
+
+  size_t page_size_;
+  RecordData record_;
+};
+
+TEST_F(MallocDebugRecordDataTest, get_present_bytes_error) {
+  EXPECT_EQ(-1, record_.GetPresentBytes(nullptr, 1000));
+  EXPECT_EQ(-1, record_.GetPresentBytes(reinterpret_cast<void*>(1000), 0));
+}
+
+TEST_F(MallocDebugRecordDataTest, get_present_bytes_edge_cases) {
+  // Need two pages to check allocations crossing over the page.
+  size_t alloc_pages = 2;
+  uint8_t* ptr = AllocPageAligned(alloc_pages);
+  ASSERT_TRUE(ptr != nullptr);
+  memset(ptr, 1, alloc_pages * page_size_);
+
+  EXPECT_EQ(20, record_.GetPresentBytes(ptr, 20));
+  EXPECT_EQ(page_size_ + 20, record_.GetPresentBytes(ptr, page_size_ + 20));
+  EXPECT_EQ(17, record_.GetPresentBytes(&ptr[page_size_ - 20], 17));
+  EXPECT_EQ(32, record_.GetPresentBytes(&ptr[page_size_ - 16], 32));
+  EXPECT_EQ(page_size_, record_.GetPresentBytes(ptr, page_size_));
+  EXPECT_EQ(page_size_ * 2, record_.GetPresentBytes(ptr, page_size_ * 2));
+}
+
+TEST_F(MallocDebugRecordDataTest, get_present_bytes_first_page_not_present) {
+  uint8_t* ptr = AllocPageAligned(2);
+  ASSERT_TRUE(ptr != nullptr);
+  ptr[page_size_] = 1;
+
+  EXPECT_EQ(0, record_.GetPresentBytes(ptr, page_size_));
+  EXPECT_EQ(3996, record_.GetPresentBytes(&ptr[100], page_size_ * 2 - 200));
+}
+
+TEST_F(MallocDebugRecordDataTest, get_present_bytes_last_page_not_present) {
+  uint8_t* ptr = AllocPageAligned(2);
+  ASSERT_TRUE(ptr != nullptr);
+  ptr[0] = 1;
+
+  EXPECT_EQ(3596, record_.GetPresentBytes(&ptr[500], page_size_ * 2 - 600));
+}
+
+TEST_F(MallocDebugRecordDataTest, get_present_bytes_large) {
+  // Needs to match the kMaxReadPages from GetPresentBytes
+  constexpr size_t kMaxReadPages = 1024;
+  // Allocate large enough that it requires at least two preads.
+  size_t alloc_pages = 2 * kMaxReadPages;
+  uint8_t* ptr = AllocPageAligned(alloc_pages);
+  ASSERT_TRUE(ptr != nullptr);
+  // Make sure that there are different number of pages present in the first
+  // read than in the second read.
+  ptr[0] = 1;
+  ptr[page_size_] = 1;
+  ptr[page_size_ * 4] = 1;
+  // Should be in the second read.
+  size_t start = kMaxReadPages * page_size_;
+  ptr[start + page_size_ * 2] = 1;
+  ptr[start + page_size_ * 4] = 1;
+  ptr[start + page_size_ * 8] = 1;
+  ptr[start + page_size_ * 9] = 1;
+
+  EXPECT_EQ(page_size_ * 7, record_.GetPresentBytes(ptr, alloc_pages * page_size_));
+
+  // Make the entire allocation resident for the next few tests.
+  for (size_t i = 0; i < alloc_pages; i++) {
+    ptr[i * page_size_] = 1;
+  }
+
+  EXPECT_EQ(page_size_ * kMaxReadPages, record_.GetPresentBytes(ptr, page_size_ * kMaxReadPages));
+  EXPECT_EQ(page_size_ * (kMaxReadPages + 1),
+            record_.GetPresentBytes(ptr, page_size_ * (kMaxReadPages + 1)));
+  EXPECT_EQ(page_size_ * kMaxReadPages - 50,
+            record_.GetPresentBytes(ptr, page_size_ * kMaxReadPages - 50));
+  EXPECT_EQ(page_size_ * (kMaxReadPages + 1) - 50,
+            record_.GetPresentBytes(ptr, page_size_ * (kMaxReadPages + 1) - 50));
+}
diff --git a/libc/malloc_debug/tests/malloc_debug_unit_tests.cpp b/libc/malloc_debug/tests/malloc_debug_unit_tests.cpp
index c808dc0..b24f4ad 100644
--- a/libc/malloc_debug/tests/malloc_debug_unit_tests.cpp
+++ b/libc/malloc_debug/tests/malloc_debug_unit_tests.cpp
@@ -45,6 +45,7 @@
 #include <platform/bionic/macros.h>
 #include <private/bionic_malloc_dispatch.h>
 
+#include <memory_trace/MemoryTrace.h>
 #include <unwindstack/Unwinder.h>
 
 #include "Config.h"
@@ -202,6 +203,44 @@
   }
 }
 
+static void VerifyRecordEntries(const std::vector<memory_trace::Entry>& expected,
+                                std::string& actual) {
+  ASSERT_TRUE(expected.size() != 0);
+  // Convert the text to entries.
+  std::vector<memory_trace::Entry> actual_entries;
+  for (const auto& line : android::base::Split(actual, "\n")) {
+    if (line.empty()) {
+      continue;
+    }
+    memory_trace::Entry entry;
+    std::string error;
+    ASSERT_TRUE(memory_trace::FillInEntryFromString(line, entry, error)) << error;
+    actual_entries.emplace_back(entry);
+  }
+  auto expected_iter = expected.begin();
+  for (const auto& actual_entry : actual_entries) {
+    if (actual_entry.type == memory_trace::THREAD_DONE) {
+      // Skip thread done entries.
+      continue;
+    }
+    ASSERT_NE(expected_iter, expected.end())
+        << "Found extra entry " << memory_trace::CreateStringFromEntry(*expected_iter);
+    SCOPED_TRACE(testing::Message()
+                 << "\nExpected entry:\n  " << memory_trace::CreateStringFromEntry(*expected_iter)
+                 << "\nActual entry:\n  " << memory_trace::CreateStringFromEntry(actual_entry));
+    EXPECT_EQ(actual_entry.type, expected_iter->type);
+    EXPECT_EQ(actual_entry.ptr, expected_iter->ptr);
+    EXPECT_EQ(actual_entry.size, expected_iter->size);
+    EXPECT_EQ(actual_entry.u.old_ptr, expected_iter->u.old_ptr);
+    EXPECT_EQ(actual_entry.present_bytes, expected_iter->present_bytes);
+    // Verify the timestamps are non-zero.
+    EXPECT_NE(actual_entry.start_ns, 0U);
+    EXPECT_NE(actual_entry.end_ns, 0U);
+    ++expected_iter;
+  }
+  EXPECT_TRUE(expected_iter == expected.end()) << "Not all expected entries found.";
+}
+
 void VerifyAllocCalls(bool all_options) {
   size_t alloc_size = 1024;
 
@@ -2457,6 +2496,114 @@
   ASSERT_STREQ("", getFakeLogPrint().c_str());
 }
 
+TEST_F(MallocDebugTest, record_allocs_present_bytes_check) {
+  InitRecordAllocs("record_allocs record_allocs_on_exit");
+
+  // The filename created on exit always appends the pid.
+  // Modify the variable so the file is deleted at the end of the test.
+  record_filename += '.' + std::to_string(getpid());
+
+  std::vector<memory_trace::Entry> expected;
+  void* ptr = debug_malloc(100);
+  expected.push_back(memory_trace::Entry{
+      .type = memory_trace::MALLOC, .ptr = reinterpret_cast<uint64_t>(ptr), .size = 100});
+
+  // Make the entire allocation present.
+  memset(ptr, 1, 100);
+
+  int64_t real_size = debug_malloc_usable_size(ptr);
+  debug_free(ptr);
+  expected.push_back(memory_trace::Entry{.type = memory_trace::FREE,
+                                         .ptr = reinterpret_cast<uint64_t>(ptr),
+                                         .present_bytes = real_size});
+
+  ptr = debug_malloc(4096);
+  expected.push_back(memory_trace::Entry{
+      .type = memory_trace::MALLOC, .ptr = reinterpret_cast<uint64_t>(ptr), .size = 4096});
+
+  memset(ptr, 1, 4096);
+  real_size = debug_malloc_usable_size(ptr);
+  void* new_ptr = debug_realloc(ptr, 8192);
+  expected.push_back(memory_trace::Entry{.type = memory_trace::REALLOC,
+                                         .ptr = reinterpret_cast<uint64_t>(new_ptr),
+                                         .size = 8192,
+                                         .u.old_ptr = reinterpret_cast<uint64_t>(ptr),
+                                         .present_bytes = real_size});
+
+  memset(new_ptr, 1, 8192);
+  real_size = debug_malloc_usable_size(new_ptr);
+  debug_free(new_ptr);
+  expected.push_back(memory_trace::Entry{.type = memory_trace::FREE,
+                                         .ptr = reinterpret_cast<uint64_t>(new_ptr),
+                                         .present_bytes = real_size});
+
+  ptr = debug_malloc(4096);
+  expected.push_back(memory_trace::Entry{
+      .type = memory_trace::MALLOC, .ptr = reinterpret_cast<uint64_t>(ptr), .size = 4096});
+  memset(ptr, 1, 4096);
+
+  // Verify a free realloc does update the present bytes.
+  real_size = debug_malloc_usable_size(ptr);
+  EXPECT_TRUE(debug_realloc(ptr, 0) == nullptr);
+  expected.push_back(memory_trace::Entry{.type = memory_trace::REALLOC,
+                                         .ptr = 0,
+                                         .u.old_ptr = reinterpret_cast<uint64_t>(ptr),
+                                         .present_bytes = real_size});
+
+  // Call the exit function manually.
+  debug_finalize();
+
+  // Read all of the contents.
+  std::string actual;
+  ASSERT_TRUE(android::base::ReadFileToString(record_filename, &actual));
+  VerifyRecordEntries(expected, actual);
+
+  ASSERT_STREQ("", getFakeLogBuf().c_str());
+  ASSERT_STREQ("", getFakeLogPrint().c_str());
+}
+
+TEST_F(MallocDebugTest, record_allocs_not_all_bytes_present) {
+  InitRecordAllocs("record_allocs record_allocs_on_exit");
+
+  // The filename created on exit always appends the pid.
+  // Modify the variable so the file is deleted at the end of the test.
+  record_filename += '.' + std::to_string(getpid());
+
+  std::vector<memory_trace::Entry> expected;
+  size_t pagesize = getpagesize();
+  void* ptr = debug_memalign(pagesize, pagesize * 8);
+  ASSERT_TRUE(ptr != nullptr);
+  expected.push_back(memory_trace::Entry{.type = memory_trace::MEMALIGN,
+                                         .ptr = reinterpret_cast<uint64_t>(ptr),
+                                         .size = pagesize * 8,
+                                         .u.align = pagesize});
+
+  // Mark only some pages in use.
+  uint8_t* data = reinterpret_cast<uint8_t*>(ptr);
+  // Make sure the memory is not in use.
+  ASSERT_EQ(0, madvise(ptr, pagesize * 8, MADV_PAGEOUT));
+  // Dirty three non-consecutive pages.
+  data[0] = 1;
+  data[pagesize * 2] = 1;
+  data[pagesize * 4] = 1;
+
+  debug_free(ptr);
+  expected.push_back(memory_trace::Entry{.type = memory_trace::FREE,
+                                         .ptr = reinterpret_cast<uint64_t>(ptr),
+                                         .present_bytes = static_cast<int64_t>(pagesize) * 3});
+
+  // Call the exit function manually.
+  debug_finalize();
+
+  // Read all of the contents.
+  std::string actual;
+  ASSERT_TRUE(android::base::ReadFileToString(record_filename, &actual));
+  VerifyRecordEntries(expected, actual);
+
+  ASSERT_STREQ("", getFakeLogBuf().c_str());
+  ASSERT_STREQ("", getFakeLogPrint().c_str());
+}
+
 TEST_F(MallocDebugTest, verify_pointers) {
   Init("verify_pointers");