|  | #include <binder/Binder.h> | 
|  | #include <binder/IBinder.h> | 
|  | #include <binder/IPCThreadState.h> | 
|  | #include <binder/IServiceManager.h> | 
|  | #include <string> | 
|  | #include <cstring> | 
|  | #include <cstdlib> | 
|  | #include <cstdio> | 
|  |  | 
|  | #include <iostream> | 
|  | #include <vector> | 
|  | #include <tuple> | 
|  |  | 
|  | #include <unistd.h> | 
|  | #include <sys/wait.h> | 
|  |  | 
|  | using namespace std; | 
|  | using namespace android; | 
|  |  | 
|  | enum BinderWorkerServiceCode { | 
|  | BINDER_NOP = IBinder::FIRST_CALL_TRANSACTION, | 
|  | }; | 
|  |  | 
|  | #define ASSERT_TRUE(cond) \ | 
|  | do { \ | 
|  | if (!(cond)) {\ | 
|  | cerr << __func__ << ":" << __LINE__ << " condition:" << #cond << " failed\n" << endl; \ | 
|  | exit(EXIT_FAILURE); \ | 
|  | } \ | 
|  | } while (0) | 
|  |  | 
|  | class BinderWorkerService : public BBinder | 
|  | { | 
|  | public: | 
|  | BinderWorkerService() {} | 
|  | ~BinderWorkerService() {} | 
|  | virtual status_t onTransact(uint32_t code, | 
|  | const Parcel& data, Parcel* reply, | 
|  | uint32_t flags = 0) { | 
|  | (void)flags; | 
|  | (void)data; | 
|  | (void)reply; | 
|  | switch (code) { | 
|  | case BINDER_NOP: | 
|  | return NO_ERROR; | 
|  | default: | 
|  | return UNKNOWN_TRANSACTION; | 
|  | }; | 
|  | } | 
|  | }; | 
|  |  | 
|  | class Pipe { | 
|  | int m_readFd; | 
|  | int m_writeFd; | 
|  | Pipe(int readFd, int writeFd) : m_readFd{readFd}, m_writeFd{writeFd} {} | 
|  | Pipe(const Pipe &) = delete; | 
|  | Pipe& operator=(const Pipe &) = delete; | 
|  | Pipe& operator=(const Pipe &&) = delete; | 
|  | public: | 
|  | Pipe(Pipe&& rval) noexcept { | 
|  | m_readFd = rval.m_readFd; | 
|  | m_writeFd = rval.m_writeFd; | 
|  | rval.m_readFd = 0; | 
|  | rval.m_writeFd = 0; | 
|  | } | 
|  | ~Pipe() { | 
|  | if (m_readFd) | 
|  | close(m_readFd); | 
|  | if (m_writeFd) | 
|  | close(m_writeFd); | 
|  | } | 
|  | void signal() { | 
|  | bool val = true; | 
|  | int error = write(m_writeFd, &val, sizeof(val)); | 
|  | ASSERT_TRUE(error >= 0); | 
|  | }; | 
|  | void wait() { | 
|  | bool val = false; | 
|  | int error = read(m_readFd, &val, sizeof(val)); | 
|  | ASSERT_TRUE(error >= 0); | 
|  | } | 
|  | template <typename T> void send(const T& v) { | 
|  | int error = write(m_writeFd, &v, sizeof(T)); | 
|  | ASSERT_TRUE(error >= 0); | 
|  | } | 
|  | template <typename T> void recv(T& v) { | 
|  | int error = read(m_readFd, &v, sizeof(T)); | 
|  | ASSERT_TRUE(error >= 0); | 
|  | } | 
|  | static tuple<Pipe, Pipe> createPipePair() { | 
|  | int a[2]; | 
|  | int b[2]; | 
|  |  | 
|  | int error1 = pipe(a); | 
|  | int error2 = pipe(b); | 
|  | ASSERT_TRUE(error1 >= 0); | 
|  | ASSERT_TRUE(error2 >= 0); | 
|  |  | 
|  | return make_tuple(Pipe(a[0], b[1]), Pipe(b[0], a[1])); | 
|  | } | 
|  | }; | 
|  |  | 
|  | static const uint32_t num_buckets = 128; | 
|  | static uint64_t max_time_bucket = 50ull * 1000000; | 
|  | static uint64_t time_per_bucket = max_time_bucket / num_buckets; | 
|  |  | 
|  | struct ProcResults { | 
|  | uint64_t m_worst = 0; | 
|  | uint32_t m_buckets[num_buckets] = {0}; | 
|  | uint64_t m_transactions = 0; | 
|  | uint64_t m_long_transactions = 0; | 
|  | uint64_t m_total_time = 0; | 
|  | uint64_t m_best = max_time_bucket; | 
|  |  | 
|  | void add_time(uint64_t time) { | 
|  | if (time > max_time_bucket) { | 
|  | m_long_transactions++; | 
|  | } | 
|  | m_buckets[min((uint32_t)(time / time_per_bucket), num_buckets - 1)] += 1; | 
|  | m_best = min(time, m_best); | 
|  | m_worst = max(time, m_worst); | 
|  | m_transactions += 1; | 
|  | m_total_time += time; | 
|  | } | 
|  | static ProcResults combine(const ProcResults& a, const ProcResults& b) { | 
|  | ProcResults ret; | 
|  | for (int i = 0; i < num_buckets; i++) { | 
|  | ret.m_buckets[i] = a.m_buckets[i] + b.m_buckets[i]; | 
|  | } | 
|  | ret.m_worst = max(a.m_worst, b.m_worst); | 
|  | ret.m_best = min(a.m_best, b.m_best); | 
|  | ret.m_transactions = a.m_transactions + b.m_transactions; | 
|  | ret.m_long_transactions = a.m_long_transactions + b.m_long_transactions; | 
|  | ret.m_total_time = a.m_total_time + b.m_total_time; | 
|  | return ret; | 
|  | } | 
|  | void dump() { | 
|  | if (m_long_transactions > 0) { | 
|  | cout << (double)m_long_transactions / m_transactions << "% of transactions took longer " | 
|  | "than estimated max latency. Consider setting -m to be higher than " | 
|  | << m_worst / 1000 << " microseconds" << endl; | 
|  | } | 
|  |  | 
|  | double best = (double)m_best / 1.0E6; | 
|  | double worst = (double)m_worst / 1.0E6; | 
|  | double average = (double)m_total_time / m_transactions / 1.0E6; | 
|  | cout << "average:" << average << "ms worst:" << worst << "ms best:" << best << "ms" << endl; | 
|  |  | 
|  | uint64_t cur_total = 0; | 
|  | float time_per_bucket_ms = time_per_bucket / 1.0E6; | 
|  | for (int i = 0; i < num_buckets; i++) { | 
|  | float cur_time = time_per_bucket_ms * i + 0.5f * time_per_bucket_ms; | 
|  | if ((cur_total < 0.5f * m_transactions) && (cur_total + m_buckets[i] >= 0.5f * m_transactions)) { | 
|  | cout << "50%: " << cur_time << " "; | 
|  | } | 
|  | if ((cur_total < 0.9f * m_transactions) && (cur_total + m_buckets[i] >= 0.9f * m_transactions)) { | 
|  | cout << "90%: " << cur_time << " "; | 
|  | } | 
|  | if ((cur_total < 0.95f * m_transactions) && (cur_total + m_buckets[i] >= 0.95f * m_transactions)) { | 
|  | cout << "95%: " << cur_time << " "; | 
|  | } | 
|  | if ((cur_total < 0.99f * m_transactions) && (cur_total + m_buckets[i] >= 0.99f * m_transactions)) { | 
|  | cout << "99%: " << cur_time << " "; | 
|  | } | 
|  | cur_total += m_buckets[i]; | 
|  | } | 
|  | cout << endl; | 
|  | } | 
|  | }; | 
|  |  | 
|  | String16 generateServiceName(int num) | 
|  | { | 
|  | char num_str[32]; | 
|  | snprintf(num_str, sizeof(num_str), "%d", num); | 
|  | String16 serviceName = String16("binderWorker") + String16(num_str); | 
|  | return serviceName; | 
|  | } | 
|  |  | 
|  | void worker_fx(int num, | 
|  | int worker_count, | 
|  | int iterations, | 
|  | int payload_size, | 
|  | bool cs_pair, | 
|  | Pipe p) | 
|  | { | 
|  | // Create BinderWorkerService and for go. | 
|  | ProcessState::self()->startThreadPool(); | 
|  | sp<IServiceManager> serviceMgr = defaultServiceManager(); | 
|  | sp<BinderWorkerService> service = new BinderWorkerService; | 
|  | serviceMgr->addService(generateServiceName(num), service); | 
|  |  | 
|  | srand(num); | 
|  | p.signal(); | 
|  | p.wait(); | 
|  |  | 
|  | // If client/server pairs, then half the workers are | 
|  | // servers and half are clients | 
|  | int server_count = cs_pair ? worker_count / 2 : worker_count; | 
|  |  | 
|  | // Get references to other binder services. | 
|  | cout << "Created BinderWorker" << num << endl; | 
|  | (void)worker_count; | 
|  | vector<sp<IBinder> > workers; | 
|  | for (int i = 0; i < server_count; i++) { | 
|  | if (num == i) | 
|  | continue; | 
|  | workers.push_back(serviceMgr->getService(generateServiceName(i))); | 
|  | } | 
|  |  | 
|  | // Run the benchmark if client | 
|  | ProcResults results; | 
|  | chrono::time_point<chrono::high_resolution_clock> start, end; | 
|  | for (int i = 0; (!cs_pair || num >= server_count) && i < iterations; i++) { | 
|  | Parcel data, reply; | 
|  | int target = cs_pair ? num % server_count : rand() % workers.size(); | 
|  | int sz = payload_size; | 
|  |  | 
|  | while (sz >= sizeof(uint32_t)) { | 
|  | data.writeInt32(0); | 
|  | sz -= sizeof(uint32_t); | 
|  | } | 
|  | start = chrono::high_resolution_clock::now(); | 
|  | status_t ret = workers[target]->transact(BINDER_NOP, data, &reply); | 
|  | end = chrono::high_resolution_clock::now(); | 
|  |  | 
|  | uint64_t cur_time = uint64_t(chrono::duration_cast<chrono::nanoseconds>(end - start).count()); | 
|  | results.add_time(cur_time); | 
|  |  | 
|  | if (ret != NO_ERROR) { | 
|  | cout << "thread " << num << " failed " << ret << "i : " << i << endl; | 
|  | exit(EXIT_FAILURE); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Signal completion to master and wait. | 
|  | p.signal(); | 
|  | p.wait(); | 
|  |  | 
|  | // Send results to master and wait for go to exit. | 
|  | p.send(results); | 
|  | p.wait(); | 
|  |  | 
|  | exit(EXIT_SUCCESS); | 
|  | } | 
|  |  | 
|  | Pipe make_worker(int num, int iterations, int worker_count, int payload_size, bool cs_pair) | 
|  | { | 
|  | auto pipe_pair = Pipe::createPipePair(); | 
|  | pid_t pid = fork(); | 
|  | if (pid) { | 
|  | /* parent */ | 
|  | return move(get<0>(pipe_pair)); | 
|  | } else { | 
|  | /* child */ | 
|  | worker_fx(num, worker_count, iterations, payload_size, cs_pair, move(get<1>(pipe_pair))); | 
|  | /* never get here */ | 
|  | return move(get<0>(pipe_pair)); | 
|  | } | 
|  |  | 
|  | } | 
|  |  | 
|  | void wait_all(vector<Pipe>& v) | 
|  | { | 
|  | for (int i = 0; i < v.size(); i++) { | 
|  | v[i].wait(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void signal_all(vector<Pipe>& v) | 
|  | { | 
|  | for (int i = 0; i < v.size(); i++) { | 
|  | v[i].signal(); | 
|  | } | 
|  | } | 
|  |  | 
|  | void run_main(int iterations, | 
|  | int workers, | 
|  | int payload_size, | 
|  | int cs_pair, | 
|  | bool training_round=false) | 
|  | { | 
|  | vector<Pipe> pipes; | 
|  | // Create all the workers and wait for them to spawn. | 
|  | for (int i = 0; i < workers; i++) { | 
|  | pipes.push_back(make_worker(i, iterations, workers, payload_size, cs_pair)); | 
|  | } | 
|  | wait_all(pipes); | 
|  |  | 
|  | // Run the workers and wait for completion. | 
|  | chrono::time_point<chrono::high_resolution_clock> start, end; | 
|  | cout << "waiting for workers to complete" << endl; | 
|  | start = chrono::high_resolution_clock::now(); | 
|  | signal_all(pipes); | 
|  | wait_all(pipes); | 
|  | end = chrono::high_resolution_clock::now(); | 
|  |  | 
|  | // Calculate overall throughput. | 
|  | double iterations_per_sec = double(iterations * workers) / (chrono::duration_cast<chrono::nanoseconds>(end - start).count() / 1.0E9); | 
|  | cout << "iterations per sec: " << iterations_per_sec << endl; | 
|  |  | 
|  | // Collect all results from the workers. | 
|  | cout << "collecting results" << endl; | 
|  | signal_all(pipes); | 
|  | ProcResults tot_results; | 
|  | for (int i = 0; i < workers; i++) { | 
|  | ProcResults tmp_results; | 
|  | pipes[i].recv(tmp_results); | 
|  | tot_results = ProcResults::combine(tot_results, tmp_results); | 
|  | } | 
|  |  | 
|  | // Kill all the workers. | 
|  | cout << "killing workers" << endl; | 
|  | signal_all(pipes); | 
|  | for (int i = 0; i < workers; i++) { | 
|  | int status; | 
|  | wait(&status); | 
|  | if (status != 0) { | 
|  | cout << "nonzero child status" << status << endl; | 
|  | } | 
|  | } | 
|  | if (training_round) { | 
|  | // sets max_time_bucket to 2 * m_worst from the training round. | 
|  | // Also needs to adjust time_per_bucket accordingly. | 
|  | max_time_bucket = 2 * tot_results.m_worst; | 
|  | time_per_bucket = max_time_bucket / num_buckets; | 
|  | cout << "Max latency during training: " << tot_results.m_worst / 1.0E6 << "ms" << endl; | 
|  | } else { | 
|  | tot_results.dump(); | 
|  | } | 
|  | } | 
|  |  | 
|  | int main(int argc, char *argv[]) | 
|  | { | 
|  | int workers = 2; | 
|  | int iterations = 10000; | 
|  | int payload_size = 0; | 
|  | bool cs_pair = false; | 
|  | bool training_round = false; | 
|  | (void)argc; | 
|  | (void)argv; | 
|  |  | 
|  | // Parse arguments. | 
|  | for (int i = 1; i < argc; i++) { | 
|  | if (string(argv[i]) == "--help") { | 
|  | cout << "Usage: binderThroughputTest [OPTIONS]" << endl; | 
|  | cout << "\t-i N    : Specify number of iterations." << endl; | 
|  | cout << "\t-m N    : Specify expected max latency in microseconds." << endl; | 
|  | cout << "\t-p      : Split workers into client/server pairs." << endl; | 
|  | cout << "\t-s N    : Specify payload size." << endl; | 
|  | cout << "\t-t N    : Run training round." << endl; | 
|  | cout << "\t-w N    : Specify total number of workers." << endl; | 
|  | return 0; | 
|  | } | 
|  | if (string(argv[i]) == "-w") { | 
|  | workers = atoi(argv[i+1]); | 
|  | i++; | 
|  | continue; | 
|  | } | 
|  | if (string(argv[i]) == "-i") { | 
|  | iterations = atoi(argv[i+1]); | 
|  | i++; | 
|  | continue; | 
|  | } | 
|  | if (string(argv[i]) == "-s") { | 
|  | payload_size = atoi(argv[i+1]); | 
|  | i++; | 
|  | } | 
|  | if (string(argv[i]) == "-p") { | 
|  | // client/server pairs instead of spreading | 
|  | // requests to all workers. If true, half | 
|  | // the workers become clients and half servers | 
|  | cs_pair = true; | 
|  | } | 
|  | if (string(argv[i]) == "-t") { | 
|  | // Run one training round before actually collecting data | 
|  | // to get an approximation of max latency. | 
|  | training_round = true; | 
|  | } | 
|  | if (string(argv[i]) == "-m") { | 
|  | // Caller specified the max latency in microseconds. | 
|  | // No need to run training round in this case. | 
|  | if (atoi(argv[i+1]) > 0) { | 
|  | max_time_bucket = strtoull(argv[i+1], (char **)nullptr, 10) * 1000; | 
|  | time_per_bucket = max_time_bucket / num_buckets; | 
|  | i++; | 
|  | } else { | 
|  | cout << "Max latency -m must be positive." << endl; | 
|  | exit(EXIT_FAILURE); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (training_round) { | 
|  | cout << "Start training round" << endl; | 
|  | run_main(iterations, workers, payload_size, cs_pair, training_round=true); | 
|  | cout << "Completed training round" << endl << endl; | 
|  | } | 
|  |  | 
|  | run_main(iterations, workers, payload_size, cs_pair); | 
|  | return 0; | 
|  | } |