Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame^] | 1 | #include "include/private/dvr/graphics/gpu_profiler.h" |
| 2 | |
| 3 | #include <cutils/log.h> |
| 4 | |
| 5 | #include <private/dvr/clock_ns.h> |
| 6 | |
| 7 | namespace android { |
| 8 | namespace dvr { |
| 9 | |
| 10 | static int64_t AdjustTimerQueryToNs(int64_t gpu_time) { return gpu_time; } |
| 11 | |
| 12 | void GpuProfiler::TimerData::reset() { |
| 13 | total_elapsed_ns = 0; |
| 14 | num_events = 0; |
| 15 | } |
| 16 | |
| 17 | void GpuProfiler::TimerData::print(const char* name) const { |
| 18 | ALOGI("GPU_TIME[%s]: %f ms", name, |
| 19 | (float)((double)total_elapsed_ns / 1000000.0 / (double)num_events)); |
| 20 | } |
| 21 | |
| 22 | // Enter a scope, records the timestamp for later matching with leave. |
| 23 | void GpuProfiler::TimerData::enter(int64_t timestamp_ns) { |
| 24 | enter_timestamp_ns = timestamp_ns; |
| 25 | } |
| 26 | |
| 27 | // Compute the elapsed time for the scope. |
| 28 | void GpuProfiler::TimerData::leave(int64_t timestamp_ns, const char* name, |
| 29 | std::weak_ptr<int64_t> duration_ns, |
| 30 | int print_period) { |
| 31 | int64_t elapsed = timestamp_ns - enter_timestamp_ns; |
| 32 | if (elapsed > 1000 * 1000 * 1000) { |
| 33 | // More than one second, drop it as invalid data. |
| 34 | return; |
| 35 | } |
| 36 | if (auto out_ns = duration_ns.lock()) { |
| 37 | *out_ns = elapsed; |
| 38 | } |
| 39 | total_elapsed_ns += elapsed; |
| 40 | if (print_period > 0 && ++num_events >= print_period) { |
| 41 | print(name); |
| 42 | reset(); |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | GpuProfiler* GpuProfiler::Get() { |
| 47 | static GpuProfiler* profiler = new GpuProfiler(); |
| 48 | return profiler; |
| 49 | } |
| 50 | |
| 51 | GpuProfiler::GpuProfiler() |
| 52 | : enable_gpu_tracing_(true), |
| 53 | sync_with_cpu_time_(false), |
| 54 | gl_timer_offset_ns_(0) { |
| 55 | SyncGlTimebase(); |
| 56 | } |
| 57 | |
| 58 | GpuProfiler::~GpuProfiler() {} |
| 59 | |
| 60 | bool GpuProfiler::IsGpuProfilingSupported() const { |
| 61 | // TODO(jbates) check for GL_EXT_disjoint_timer_query |
| 62 | return true; |
| 63 | } |
| 64 | |
| 65 | GLuint GpuProfiler::TryAllocateGlQueryId() { |
| 66 | GLuint query_id = 0; |
| 67 | if (gl_timer_query_id_pool_.empty()) { |
| 68 | glGenQueries(1, &query_id); |
| 69 | } else { |
| 70 | query_id = gl_timer_query_id_pool_.top(); |
| 71 | gl_timer_query_id_pool_.pop(); |
| 72 | } |
| 73 | return query_id; |
| 74 | } |
| 75 | |
| 76 | void GpuProfiler::EnterGlScope(const char* scope_name) { |
| 77 | GLuint query_id = TryAllocateGlQueryId(); |
| 78 | if (query_id != 0) { |
| 79 | glQueryCounter(query_id, GL_TIMESTAMP_EXT); |
| 80 | pending_gpu_queries_.push_back( |
| 81 | GpuTimerQuery(GetSystemClockNs(), scope_name, std::weak_ptr<int64_t>(), |
| 82 | -1, query_id, GpuTimerQuery::kQueryBeginScope)); |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | void GpuProfiler::LeaveGlScope(const char* scope_name, |
| 87 | std::weak_ptr<int64_t> duration_ns, |
| 88 | int print_period) { |
| 89 | GLuint query_id = TryAllocateGlQueryId(); |
| 90 | if (query_id != 0) { |
| 91 | glQueryCounter(query_id, GL_TIMESTAMP_EXT); |
| 92 | pending_gpu_queries_.push_back( |
| 93 | GpuTimerQuery(GetSystemClockNs(), scope_name, duration_ns, print_period, |
| 94 | query_id, GpuTimerQuery::kQueryEndScope)); |
| 95 | } |
| 96 | } |
| 97 | |
| 98 | void GpuProfiler::SyncGlTimebase() { |
| 99 | if (!sync_with_cpu_time_) { |
| 100 | return; |
| 101 | } |
| 102 | |
| 103 | // Clear disjoint error status. |
| 104 | // This error status indicates that we need to ignore the result of the |
| 105 | // timer query because of some kind of disjoint GPU event such as heat |
| 106 | // throttling. |
| 107 | GLint disjoint = 0; |
| 108 | glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint); |
| 109 | |
| 110 | // Try to get the current GL timestamp. Since the GPU can supposedly fail to |
| 111 | // produce a timestamp occasionally we try a few times before giving up. |
| 112 | int attempts_remaining = 3; |
| 113 | do { |
| 114 | GLint64 gl_timestamp = 0; |
| 115 | glGetInteger64v(GL_TIMESTAMP_EXT, &gl_timestamp); |
| 116 | gl_timestamp = AdjustTimerQueryToNs(gl_timestamp); |
| 117 | |
| 118 | // Now get the CPU timebase. |
| 119 | int64_t cpu_timebase_ns = static_cast<int64_t>(GetSystemClockNs()); |
| 120 | |
| 121 | disjoint = 0; |
| 122 | glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint); |
| 123 | if (!disjoint) { |
| 124 | gl_timer_offset_ns_ = cpu_timebase_ns - gl_timestamp; |
| 125 | break; |
| 126 | } |
| 127 | ALOGW("WARNING: Skipping disjoint GPU timestamp"); |
| 128 | } while (--attempts_remaining > 0); |
| 129 | |
| 130 | if (attempts_remaining == 0) { |
| 131 | ALOGE("ERROR: Failed to sync GL timebase due to disjoint results\n"); |
| 132 | gl_timer_offset_ns_ = 0; |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | void GpuProfiler::QueryFrameBegin() { |
| 137 | GLuint begin_frame_id = TryAllocateGlQueryId(); |
| 138 | if (begin_frame_id != 0) { |
| 139 | glQueryCounter(begin_frame_id, GL_TIMESTAMP_EXT); |
| 140 | pending_gpu_queries_.push_back( |
| 141 | GpuTimerQuery(GetSystemClockNs(), 0, std::weak_ptr<int64_t>(), -1, |
| 142 | begin_frame_id, GpuTimerQuery::kQueryBeginFrame)); |
| 143 | } |
| 144 | } |
| 145 | |
| 146 | void GpuProfiler::PollGlTimerQueries() { |
| 147 | if (!enabled()) { |
| 148 | return; |
| 149 | } |
| 150 | |
| 151 | #ifdef ENABLE_DISJOINT_TIMER_IGNORING |
| 152 | bool has_checked_disjoint = false; |
| 153 | bool was_disjoint = false; |
| 154 | #endif |
| 155 | for (;;) { |
| 156 | if (pending_gpu_queries_.empty()) { |
| 157 | // No queries pending. |
| 158 | return; |
| 159 | } |
| 160 | |
| 161 | GpuTimerQuery query = pending_gpu_queries_.front(); |
| 162 | |
| 163 | GLint available = 0; |
| 164 | glGetQueryObjectiv(query.query_id, GL_QUERY_RESULT_AVAILABLE_EXT, |
| 165 | &available); |
| 166 | if (!available) { |
| 167 | // No queries available. |
| 168 | return; |
| 169 | } |
| 170 | |
| 171 | // Found an available query, remove it from pending queue. |
| 172 | pending_gpu_queries_.pop_front(); |
| 173 | gl_timer_query_id_pool_.push(query.query_id); |
| 174 | |
| 175 | #ifdef ENABLE_DISJOINT_TIMER_IGNORING |
| 176 | if (!has_checked_disjoint) { |
| 177 | // Check if we need to ignore the result of the timer query because |
| 178 | // of some kind of disjoint GPU event such as heat throttling. |
| 179 | // If so, we ignore all events that are available during this loop. |
| 180 | has_checked_disjoint = true; |
| 181 | GLint disjoint_occurred = 0; |
| 182 | glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint_occurred); |
| 183 | was_disjoint = !!disjoint_occurred; |
| 184 | if (was_disjoint) { |
| 185 | ALOGW("Skipping disjoint GPU events"); |
| 186 | } |
| 187 | } |
| 188 | |
| 189 | if (was_disjoint) { |
| 190 | continue; |
| 191 | } |
| 192 | #endif |
| 193 | |
| 194 | GLint64 timestamp_ns = 0; |
| 195 | glGetQueryObjecti64v(query.query_id, GL_QUERY_RESULT_EXT, ×tamp_ns); |
| 196 | timestamp_ns = AdjustTimerQueryToNs(timestamp_ns); |
| 197 | |
| 198 | int64_t adjusted_timestamp_ns; |
| 199 | |
| 200 | if (sync_with_cpu_time_) { |
| 201 | adjusted_timestamp_ns = timestamp_ns + gl_timer_offset_ns_; |
| 202 | |
| 203 | if (query.type == GpuTimerQuery::kQueryBeginFrame || |
| 204 | query.type == GpuTimerQuery::kQueryBeginScope) { |
| 205 | if (adjusted_timestamp_ns < query.timestamp_ns) { |
| 206 | // GPU clock is behind, adjust our offset to correct it. |
| 207 | gl_timer_offset_ns_ += query.timestamp_ns - adjusted_timestamp_ns; |
| 208 | adjusted_timestamp_ns = query.timestamp_ns; |
| 209 | } |
| 210 | } |
| 211 | } else { |
| 212 | adjusted_timestamp_ns = timestamp_ns; |
| 213 | } |
| 214 | |
| 215 | switch (query.type) { |
| 216 | case GpuTimerQuery::kQueryBeginFrame: |
| 217 | break; |
| 218 | case GpuTimerQuery::kQueryBeginScope: |
| 219 | events_[query.scope_name].enter(adjusted_timestamp_ns); |
| 220 | break; |
| 221 | case GpuTimerQuery::kQueryEndScope: |
| 222 | events_[query.scope_name].leave(adjusted_timestamp_ns, query.scope_name, |
| 223 | query.duration_ns, query.print_period); |
| 224 | break; |
| 225 | } |
| 226 | } |
| 227 | } |
| 228 | |
| 229 | void GpuProfiler::FinishGlTimerQueries() { |
| 230 | if (!enabled()) { |
| 231 | return; |
| 232 | } |
| 233 | |
| 234 | glFlush(); |
| 235 | PollGlTimerQueries(); |
| 236 | int max_iterations = 100; |
| 237 | while (!pending_gpu_queries_.empty()) { |
| 238 | if (--max_iterations <= 0) { |
| 239 | ALOGE("Error: GL timer queries failed to finish."); |
| 240 | break; |
| 241 | } |
| 242 | PollGlTimerQueries(); |
| 243 | usleep(1000); |
| 244 | } |
| 245 | } |
| 246 | |
| 247 | } // namespace dvr |
| 248 | } // namespace android |