binderThroughputTest: compute percentiles exactly
Instead of using buckets to compute the percentiles, store all of the
data and compute percentiles exactly by sorting.
This avoids cases where the computed maximum latency is much much larger
than the average latency. For example, with `-w 64 -i 100000 -p -t`, I
got an average latency of 0.47ms, but the worst latency is 243ms. This
puts everything below 1.8ms in the same bucket.
Bug: 293593894
Test: Manually running the benchmark outputs the correct values
Change-Id: Ib215e4fb503e053294525d944f045555aadae97f
Signed-off-by: Alice Ryhl <aliceryhl@google.com>
diff --git a/libs/binder/tests/binderThroughputTest.cpp b/libs/binder/tests/binderThroughputTest.cpp
index 8f99295..d7f6318 100644
--- a/libs/binder/tests/binderThroughputTest.cpp
+++ b/libs/binder/tests/binderThroughputTest.cpp
@@ -49,6 +49,63 @@
}
};
+static uint64_t warn_latency = std::numeric_limits<uint64_t>::max();
+
+struct ProcResults {
+ vector<uint64_t> data;
+
+ ProcResults(size_t capacity) { data.reserve(capacity); }
+
+ void add_time(uint64_t time) { data.push_back(time); }
+ void combine_with(const ProcResults& append) {
+ data.insert(data.end(), append.data.begin(), append.data.end());
+ }
+ uint64_t worst() {
+ return *max_element(data.begin(), data.end());
+ }
+ void dump() {
+ if (data.size() == 0) {
+ // This avoids index-out-of-bounds below.
+ cout << "error: no data\n" << endl;
+ return;
+ }
+
+ size_t num_long_transactions = 0;
+ for (uint64_t elem : data) {
+ if (elem > warn_latency) {
+ num_long_transactions += 1;
+ }
+ }
+
+ if (num_long_transactions > 0) {
+ cout << (double)num_long_transactions / data.size() << "% of transactions took longer "
+ "than estimated max latency. Consider setting -m to be higher than "
+ << worst() / 1000 << " microseconds" << endl;
+ }
+
+ sort(data.begin(), data.end());
+
+ uint64_t total_time = 0;
+ for (uint64_t elem : data) {
+ total_time += elem;
+ }
+
+ double best = (double)data[0] / 1.0E6;
+ double worst = (double)data.back() / 1.0E6;
+ double average = (double)total_time / data.size() / 1.0E6;
+ cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
+
+ double percentile_50 = data[(50 * data.size()) / 100] / 1.0E6;
+ double percentile_90 = data[(90 * data.size()) / 100] / 1.0E6;
+ double percentile_95 = data[(95 * data.size()) / 100] / 1.0E6;
+ double percentile_99 = data[(99 * data.size()) / 100] / 1.0E6;
+ cout << "50%: " << percentile_50 << " ";
+ cout << "90%: " << percentile_90 << " ";
+ cout << "95%: " << percentile_95 << " ";
+ cout << "99%: " << percentile_99 << endl;
+ }
+};
+
class Pipe {
int m_readFd;
int m_writeFd;
@@ -79,13 +136,37 @@
int error = read(m_readFd, &val, sizeof(val));
ASSERT_TRUE(error >= 0);
}
- template <typename T> void send(const T& v) {
- int error = write(m_writeFd, &v, sizeof(T));
+ void send(const ProcResults& v) {
+ size_t num_elems = v.data.size();
+
+ int error = write(m_writeFd, &num_elems, sizeof(size_t));
ASSERT_TRUE(error >= 0);
+
+ char* to_write = (char*)v.data.data();
+ size_t num_bytes = sizeof(uint64_t) * num_elems;
+
+ while (num_bytes > 0) {
+ int ret = write(m_writeFd, to_write, num_bytes);
+ ASSERT_TRUE(ret >= 0);
+ num_bytes -= ret;
+ to_write += ret;
+ }
}
- template <typename T> void recv(T& v) {
- int error = read(m_readFd, &v, sizeof(T));
+ void recv(ProcResults& v) {
+ size_t num_elems = 0;
+ int error = read(m_readFd, &num_elems, sizeof(size_t));
ASSERT_TRUE(error >= 0);
+
+ v.data.resize(num_elems);
+ char* read_to = (char*)v.data.data();
+ size_t num_bytes = sizeof(uint64_t) * num_elems;
+
+ while (num_bytes > 0) {
+ int ret = read(m_readFd, read_to, num_bytes);
+ ASSERT_TRUE(ret >= 0);
+ num_bytes -= ret;
+ read_to += ret;
+ }
}
static tuple<Pipe, Pipe> createPipePair() {
int a[2];
@@ -100,74 +181,6 @@
}
};
-static const uint32_t num_buckets = 128;
-static uint64_t max_time_bucket = 50ull * 1000000;
-static uint64_t time_per_bucket = max_time_bucket / num_buckets;
-
-struct ProcResults {
- uint64_t m_worst = 0;
- uint32_t m_buckets[num_buckets] = {0};
- uint64_t m_transactions = 0;
- uint64_t m_long_transactions = 0;
- uint64_t m_total_time = 0;
- uint64_t m_best = max_time_bucket;
-
- void add_time(uint64_t time) {
- if (time > max_time_bucket) {
- m_long_transactions++;
- }
- m_buckets[min((uint32_t)(time / time_per_bucket), num_buckets - 1)] += 1;
- m_best = min(time, m_best);
- m_worst = max(time, m_worst);
- m_transactions += 1;
- m_total_time += time;
- }
- static ProcResults combine(const ProcResults& a, const ProcResults& b) {
- ProcResults ret;
- for (int i = 0; i < num_buckets; i++) {
- ret.m_buckets[i] = a.m_buckets[i] + b.m_buckets[i];
- }
- ret.m_worst = max(a.m_worst, b.m_worst);
- ret.m_best = min(a.m_best, b.m_best);
- ret.m_transactions = a.m_transactions + b.m_transactions;
- ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions;
- ret.m_total_time = a.m_total_time + b.m_total_time;
- return ret;
- }
- void dump() {
- if (m_long_transactions > 0) {
- cout << (double)m_long_transactions / m_transactions << "% of transactions took longer "
- "than estimated max latency. Consider setting -m to be higher than "
- << m_worst / 1000 << " microseconds" << endl;
- }
-
- double best = (double)m_best / 1.0E6;
- double worst = (double)m_worst / 1.0E6;
- double average = (double)m_total_time / m_transactions / 1.0E6;
- cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl;
-
- uint64_t cur_total = 0;
- float time_per_bucket_ms = time_per_bucket / 1.0E6;
- for (int i = 0; i < num_buckets; i++) {
- float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms;
- if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) {
- cout << "50%: " << cur_time << " ";
- }
- if ((cur_total < 0.9f * m_transactions) && (cur_total + m_buckets[i] >= 0.9f * m_transactions)) {
- cout << "90%: " << cur_time << " ";
- }
- if ((cur_total < 0.95f * m_transactions) && (cur_total + m_buckets[i] >= 0.95f * m_transactions)) {
- cout << "95%: " << cur_time << " ";
- }
- if ((cur_total < 0.99f * m_transactions) && (cur_total + m_buckets[i] >= 0.99f * m_transactions)) {
- cout << "99%: " << cur_time << " ";
- }
- cur_total += m_buckets[i];
- }
- cout << endl;
- }
-};
-
String16 generateServiceName(int num)
{
char num_str[32];
@@ -208,7 +221,8 @@
}
// Run the benchmark if client
- ProcResults results;
+ ProcResults results(iterations);
+
chrono::time_point<chrono::high_resolution_clock> start, end;
for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) {
Parcel data, reply;
@@ -302,11 +316,10 @@
// Collect all results from the workers.
cout << "collecting results" << endl;
signal_all(pipes);
- ProcResults tot_results;
+ ProcResults tot_results(0), tmp_results(0);
for (int i = 0; i < workers; i++) {
- ProcResults tmp_results;
pipes[i].recv(tmp_results);
- tot_results = ProcResults::combine(tot_results, tmp_results);
+ tot_results.combine_with(tmp_results);
}
// Kill all the workers.
@@ -320,13 +333,11 @@
}
}
if (training_round) {
- // sets max_time_bucket to 2 * m_worst from the training round.
- // Also needs to adjust time_per_bucket accordingly.
- max_time_bucket = 2 * tot_results.m_worst;
- time_per_bucket = max_time_bucket / num_buckets;
- cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl;
+ // Sets warn_latency to 2 * worst from the training round.
+ warn_latency = 2 * tot_results.worst();
+ cout << "Max latency during training: " << tot_results.worst() / 1.0E6 << "ms" << endl;
} else {
- tot_results.dump();
+ tot_results.dump();
}
}
@@ -403,8 +414,7 @@
cout << "Max latency -m must be positive." << endl;
exit(EXIT_FAILURE);
}
- max_time_bucket = max_time_us * 1000ull;
- time_per_bucket = max_time_bucket / num_buckets;
+ warn_latency = max_time_us * 1000ull;
i++;
continue;
}