binderRpcBenchmark: test many proxy behavior

Run benchmark. Here are the results on cuttlefish, sorted by type:

--------------------------------------------------------------------
Benchmark                          Time             CPU   Iterations
--------------------------------------------------------------------
BM_collectProxies/0/10       1533424 ns       730683 ns          754 kernel
BM_collectProxies/0/100     11205890 ns      5137458 ns          100 kernel
BM_collectProxies/0/1000   142801788 ns     62453023 ns           13 kernel
BM_collectProxies/0/5000  1027138576 ns    343139318 ns            2 kernel
BM_collectProxies/0/10000 2693937199 ns    649060571 ns            1 kernel
BM_collectProxies/0/20000 6821069171 ns    993179895 ns            1 kernel

BM_collectProxies/1/10       1548213 ns       777264 ns          703 rpc
BM_collectProxies/1/100     11427798 ns      5646163 ns          100 rpc
BM_collectProxies/1/1000   139841963 ns     64293601 ns           10 rpc
BM_collectProxies/1/5000   704893121 ns    289427753 ns            2 rpc
BM_collectProxies/1/10000 2248307827 ns    646102929 ns            1 rpc
BM_collectProxies/1/20000 5540550470 ns   1045339220 ns            1 rpc

BM_collectProxies/2/10       1992236 ns      1131694 ns          641 rpc_tls
BM_collectProxies/2/100     17621061 ns      9583246 ns           73 rpc_tls
BM_collectProxies/2/1000   170796751 ns     93129790 ns           10 rpc_tls
BM_collectProxies/2/5000  1062981321 ns    473419293 ns            2 rpc_tls
BM_collectProxies/2/10000 2146783259 ns    782427997 ns            1 rpc_tls
BM_collectProxies/2/20000 7441237493 ns   2041843159 ns            1 rpc_tls

Note - super-linear scaling of behavior above 1000 binder objects.

Because we see this scaling in both RPC binder and kernel binder (but to
a lesser extend in 'rpc' - ignore 'rpc_tls' which has a lot of extra
overhead), this indicates that part of the nonlinearity likely comes
from userspace code OR RPC-binder itself has non-linearity similar
to the binder driver.

Fixes: 298049518
Test: binderRpcBenchmark

Change-Id: Ib43ac68a1b1f9c11df06af59e3c5572abeabbd5d
diff --git a/libs/binder/tests/binderRpcBenchmark.cpp b/libs/binder/tests/binderRpcBenchmark.cpp
index 9c96c41..4f10d74 100644
--- a/libs/binder/tests/binderRpcBenchmark.cpp
+++ b/libs/binder/tests/binderRpcBenchmark.cpp
@@ -74,6 +74,44 @@
         *out = bytes;
         return Status::ok();
     }
+
+    class CountedBinder : public BBinder {
+    public:
+        CountedBinder(const sp<MyBinderRpcBenchmark>& parent) : mParent(parent) {
+            std::lock_guard<std::mutex> l(mParent->mCountMutex);
+            mParent->mBinderCount++;
+            // std::cout << "Count + is now " << mParent->mBinderCount << std::endl;
+        }
+        ~CountedBinder() {
+            {
+                std::lock_guard<std::mutex> l(mParent->mCountMutex);
+                mParent->mBinderCount--;
+                // std::cout << "Count - is now " << mParent->mBinderCount << std::endl;
+
+                // skip notify
+                if (mParent->mBinderCount != 0) return;
+            }
+            mParent->mCountCv.notify_one();
+        }
+
+    private:
+        sp<MyBinderRpcBenchmark> mParent;
+    };
+
+    Status gimmeBinder(sp<IBinder>* out) override {
+        *out = sp<CountedBinder>::make(sp<MyBinderRpcBenchmark>::fromExisting(this));
+        return Status::ok();
+    }
+    Status waitGimmesDestroyed() override {
+        std::unique_lock<std::mutex> l(mCountMutex);
+        mCountCv.wait(l, [&] { return mBinderCount == 0; });
+        return Status::ok();
+    }
+
+    friend class CountedBinder;
+    std::mutex mCountMutex;
+    std::condition_variable mCountCv;
+    size_t mBinderCount;
 };
 
 enum Transport {
@@ -212,6 +250,38 @@
         ->ArgsProduct({kTransportList,
                        {64, 1024, 2048, 4096, 8182, 16364, 32728, 65535, 65536, 65537}});
 
+void BM_collectProxies(benchmark::State& state) {
+    sp<IBinder> binder = getBinderForOptions(state);
+    sp<IBinderRpcBenchmark> iface = interface_cast<IBinderRpcBenchmark>(binder);
+    CHECK(iface != nullptr);
+
+    const size_t kNumIters = state.range(1);
+
+    while (state.KeepRunning()) {
+        std::vector<sp<IBinder>> out;
+        out.resize(kNumIters);
+
+        for (size_t i = 0; i < kNumIters; i++) {
+            Status ret = iface->gimmeBinder(&out[i]);
+            CHECK(ret.isOk()) << ret;
+        }
+
+        out.clear();
+
+        // we are using a thread up to wait, so make a call to
+        // force all refcounts to be updated first - current
+        // binder behavior means we really don't need to wait,
+        // so code which is waiting is really there to protect
+        // against any future changes that could delay destruction
+        android::IInterface::asBinder(iface)->pingBinder();
+
+        iface->waitGimmesDestroyed();
+    }
+
+    SetLabel(state);
+}
+BENCHMARK(BM_collectProxies)->ArgsProduct({kTransportList, {10, 100, 1000, 5000, 10000, 20000}});
+
 void BM_repeatBinder(benchmark::State& state) {
     sp<IBinder> binder = getBinderForOptions(state);
     CHECK(binder != nullptr);