GPU Memory: ensure bpf program is attached to the tracepoint

Previously GpuMem is gated upon bpfloader loaded. Recently, bpfloader
gets initialized much earlier which in return makes GpuMem initialize
earlier. However, at that time, the kernel gfx driver may still not get
loaded yet. This change fixes the racing here.

Bug: 159963505
Test: adb shell dumpsys gpu --gpumem
Test: atest gpuservice_unittest:GpuMemTest
Change-Id: If434c205ff3175354d85924075fc1de1e91151f9
Merged-In: If434c205ff3175354d85924075fc1de1e91151f9
diff --git a/services/gpuservice/GpuService.cpp b/services/gpuservice/GpuService.cpp
index 0ca8e5d..84ae608 100644
--- a/services/gpuservice/GpuService.cpp
+++ b/services/gpuservice/GpuService.cpp
@@ -31,6 +31,8 @@
 #include <utils/Trace.h>
 #include <vkjson.h>
 
+#include <thread>
+
 namespace android {
 
 using base::StringAppendF;
@@ -47,7 +49,8 @@
 
 GpuService::GpuService()
       : mGpuMem(std::make_unique<GpuMem>()), mGpuStats(std::make_unique<GpuStats>()) {
-    mGpuMem->initialize();
+    std::thread asyncInitThread([this]() { mGpuMem->initialize(); });
+    asyncInitThread.detach();
 };
 
 void GpuService::setGpuStats(const std::string& driverPackageName,
diff --git a/services/gpuservice/gpumem/GpuMem.cpp b/services/gpuservice/gpumem/GpuMem.cpp
index 1d4b524..9b4053b 100644
--- a/services/gpuservice/gpumem/GpuMem.cpp
+++ b/services/gpuservice/gpumem/GpuMem.cpp
@@ -24,6 +24,7 @@
 #include <libbpf.h>
 #include <libbpf_android.h>
 #include <log/log.h>
+#include <unistd.h>
 #include <utils/Trace.h>
 
 #include <unordered_map>
@@ -47,11 +48,17 @@
         return;
     }
 
+    // TODO(http://b/159963505): Figure out a nicer way to wait until GPU driver loaded.
     // Attach the program to the tracepoint, and the tracepoint is automatically enabled here.
-    if (bpf_attach_tracepoint(fd, kGpuMemTraceGroup, kGpuMemTotalTracepoint) < 0) {
-        ALOGE("Failed to attach bpf program to %s/%s tracepoint", kGpuMemTraceGroup,
-              kGpuMemTotalTracepoint);
-        return;
+    int count = 0;
+    while (bpf_attach_tracepoint(fd, kGpuMemTraceGroup, kGpuMemTotalTracepoint) < 0) {
+        if (++count > kGpuWaitTimeout) {
+            ALOGE("Failed to attach bpf program to %s/%s tracepoint", kGpuMemTraceGroup,
+                  kGpuMemTotalTracepoint);
+            return;
+        }
+        // Retry until GPU driver loaded or timeout.
+        sleep(1);
     }
 
     // Use the read-only wrapper BpfMapRO to properly retrieve the read-only map.
@@ -61,6 +68,8 @@
         return;
     }
     setGpuMemTotalMap(map);
+
+    mInitialized.store(true);
 }
 
 void GpuMem::setGpuMemTotalMap(bpf::BpfMap<uint64_t, uint64_t>& map) {
@@ -71,7 +80,7 @@
 void GpuMem::dump(const Vector<String16>& /* args */, std::string* result) {
     ATRACE_CALL();
 
-    if (!mGpuMemTotalMap.isValid()) {
+    if (!mInitialized.load() || !mGpuMemTotalMap.isValid()) {
         result->append("Failed to initialize GPU memory eBPF\n");
         return;
     }
diff --git a/services/gpuservice/gpumem/include/gpumem/GpuMem.h b/services/gpuservice/gpumem/include/gpumem/GpuMem.h
index 6d0322a..ff8b4bc 100644
--- a/services/gpuservice/gpumem/include/gpumem/GpuMem.h
+++ b/services/gpuservice/gpumem/include/gpumem/GpuMem.h
@@ -39,6 +39,8 @@
     // set gpu memory total map
     void setGpuMemTotalMap(bpf::BpfMap<uint64_t, uint64_t>& map);
 
+    // indicate whether ebpf has been initialized
+    std::atomic<bool> mInitialized = false;
     // bpf map for GPU memory total data
     android::bpf::BpfMap<uint64_t, uint64_t> mGpuMemTotalMap;
 
@@ -51,6 +53,8 @@
             "/sys/fs/bpf/prog_gpu_mem_tracepoint_gpu_mem_gpu_mem_total";
     // pinned gpu memory total bpf map path in bpf sysfs
     static constexpr char kGpuMemTotalMapPath[] = "/sys/fs/bpf/map_gpu_mem_gpu_mem_total_map";
+    // 30 seconds timeout for trying to attach bpf program to tracepoint
+    static constexpr int kGpuWaitTimeout = 30;
 };
 
 } // namespace android
diff --git a/services/gpuservice/tests/unittests/GpuMemTest.cpp b/services/gpuservice/tests/unittests/GpuMemTest.cpp
index 6ba304c..abaf30a 100644
--- a/services/gpuservice/tests/unittests/GpuMemTest.cpp
+++ b/services/gpuservice/tests/unittests/GpuMemTest.cpp
@@ -62,6 +62,7 @@
 
         mGpuMem = std::make_unique<GpuMem>();
         mTestableGpuMem = TestableGpuMem(mGpuMem.get());
+        mTestableGpuMem.setInitialized();
         errno = 0;
         mTestMap = bpf::BpfMap<uint64_t, uint64_t>(BPF_MAP_TYPE_HASH, TEST_MAP_SIZE,
                                                    BPF_F_NO_PREALLOC);
diff --git a/services/gpuservice/tests/unittests/TestableGpuMem.h b/services/gpuservice/tests/unittests/TestableGpuMem.h
index 0e4b01c..6c8becb 100644
--- a/services/gpuservice/tests/unittests/TestableGpuMem.h
+++ b/services/gpuservice/tests/unittests/TestableGpuMem.h
@@ -26,6 +26,8 @@
     TestableGpuMem() = default;
     explicit TestableGpuMem(GpuMem *gpuMem) : mGpuMem(gpuMem) {}
 
+    void setInitialized() { mGpuMem->mInitialized.store(true); }
+
     void setGpuMemTotalMap(bpf::BpfMap<uint64_t, uint64_t>& map) {
         mGpuMem->setGpuMemTotalMap(map);
     }