Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 1 | #include "include/private/dvr/graphics/gpu_profiler.h" |
| 2 | |
Alex Vakulenko | 4fe6058 | 2017-02-02 11:35:59 -0800 | [diff] [blame] | 3 | #include <log/log.h> |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 4 | |
| 5 | #include <private/dvr/clock_ns.h> |
| 6 | |
| 7 | namespace android { |
| 8 | namespace dvr { |
| 9 | |
Steven Thomas | 28f85d6 | 2017-04-17 11:55:34 -0700 | [diff] [blame] | 10 | namespace { |
| 11 | |
| 12 | constexpr int kMaxPendingQueries = 32; |
| 13 | |
| 14 | } // anonynmous namespace |
| 15 | |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 16 | static int64_t AdjustTimerQueryToNs(int64_t gpu_time) { return gpu_time; } |
| 17 | |
| 18 | void GpuProfiler::TimerData::reset() { |
| 19 | total_elapsed_ns = 0; |
| 20 | num_events = 0; |
| 21 | } |
| 22 | |
| 23 | void GpuProfiler::TimerData::print(const char* name) const { |
| 24 | ALOGI("GPU_TIME[%s]: %f ms", name, |
| 25 | (float)((double)total_elapsed_ns / 1000000.0 / (double)num_events)); |
| 26 | } |
| 27 | |
| 28 | // Enter a scope, records the timestamp for later matching with leave. |
| 29 | void GpuProfiler::TimerData::enter(int64_t timestamp_ns) { |
Steven Thomas | 28f85d6 | 2017-04-17 11:55:34 -0700 | [diff] [blame] | 30 | entered = true; |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 31 | enter_timestamp_ns = timestamp_ns; |
| 32 | } |
| 33 | |
| 34 | // Compute the elapsed time for the scope. |
| 35 | void GpuProfiler::TimerData::leave(int64_t timestamp_ns, const char* name, |
| 36 | std::weak_ptr<int64_t> duration_ns, |
| 37 | int print_period) { |
Steven Thomas | 28f85d6 | 2017-04-17 11:55:34 -0700 | [diff] [blame] | 38 | if (!entered) { |
| 39 | // We got the leave event but are missing the enter. This can happen if |
| 40 | // OnPendingQueryOverflow() is called, or if the calls to enter()/leave() |
| 41 | // aren't properly balanced. Ignore the call but print a warning. |
| 42 | ALOGW("Ignoring GpuProfiler::TimerData::leave event with no enter event"); |
| 43 | return; |
| 44 | } |
| 45 | entered = false; |
| 46 | |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 47 | int64_t elapsed = timestamp_ns - enter_timestamp_ns; |
| 48 | if (elapsed > 1000 * 1000 * 1000) { |
| 49 | // More than one second, drop it as invalid data. |
| 50 | return; |
| 51 | } |
| 52 | if (auto out_ns = duration_ns.lock()) { |
| 53 | *out_ns = elapsed; |
| 54 | } |
| 55 | total_elapsed_ns += elapsed; |
| 56 | if (print_period > 0 && ++num_events >= print_period) { |
| 57 | print(name); |
| 58 | reset(); |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | GpuProfiler* GpuProfiler::Get() { |
| 63 | static GpuProfiler* profiler = new GpuProfiler(); |
| 64 | return profiler; |
| 65 | } |
| 66 | |
| 67 | GpuProfiler::GpuProfiler() |
| 68 | : enable_gpu_tracing_(true), |
Steven Thomas | 28f85d6 | 2017-04-17 11:55:34 -0700 | [diff] [blame] | 69 | has_gl_context_(false), |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 70 | sync_with_cpu_time_(false), |
| 71 | gl_timer_offset_ns_(0) { |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 72 | } |
| 73 | |
Steven Thomas | 28f85d6 | 2017-04-17 11:55:34 -0700 | [diff] [blame] | 74 | GpuProfiler::~GpuProfiler() { Clear(); } |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 75 | |
| 76 | bool GpuProfiler::IsGpuProfilingSupported() const { |
| 77 | // TODO(jbates) check for GL_EXT_disjoint_timer_query |
| 78 | return true; |
| 79 | } |
| 80 | |
| 81 | GLuint GpuProfiler::TryAllocateGlQueryId() { |
Steven Thomas | 28f85d6 | 2017-04-17 11:55:34 -0700 | [diff] [blame] | 82 | if (pending_gpu_queries_.size() >= kMaxPendingQueries) |
| 83 | OnPendingQueryOverflow(); |
| 84 | |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 85 | GLuint query_id = 0; |
| 86 | if (gl_timer_query_id_pool_.empty()) { |
| 87 | glGenQueries(1, &query_id); |
| 88 | } else { |
| 89 | query_id = gl_timer_query_id_pool_.top(); |
| 90 | gl_timer_query_id_pool_.pop(); |
| 91 | } |
| 92 | return query_id; |
| 93 | } |
| 94 | |
| 95 | void GpuProfiler::EnterGlScope(const char* scope_name) { |
| 96 | GLuint query_id = TryAllocateGlQueryId(); |
| 97 | if (query_id != 0) { |
| 98 | glQueryCounter(query_id, GL_TIMESTAMP_EXT); |
| 99 | pending_gpu_queries_.push_back( |
| 100 | GpuTimerQuery(GetSystemClockNs(), scope_name, std::weak_ptr<int64_t>(), |
| 101 | -1, query_id, GpuTimerQuery::kQueryBeginScope)); |
| 102 | } |
| 103 | } |
| 104 | |
| 105 | void GpuProfiler::LeaveGlScope(const char* scope_name, |
| 106 | std::weak_ptr<int64_t> duration_ns, |
| 107 | int print_period) { |
| 108 | GLuint query_id = TryAllocateGlQueryId(); |
| 109 | if (query_id != 0) { |
| 110 | glQueryCounter(query_id, GL_TIMESTAMP_EXT); |
| 111 | pending_gpu_queries_.push_back( |
| 112 | GpuTimerQuery(GetSystemClockNs(), scope_name, duration_ns, print_period, |
| 113 | query_id, GpuTimerQuery::kQueryEndScope)); |
| 114 | } |
| 115 | } |
| 116 | |
Steven Thomas | 28f85d6 | 2017-04-17 11:55:34 -0700 | [diff] [blame] | 117 | void GpuProfiler::OnGlContextCreated() { |
| 118 | has_gl_context_ = true; |
| 119 | gl_timer_offset_ns_ = 0; |
| 120 | SyncGlTimebase(); |
| 121 | } |
| 122 | |
| 123 | void GpuProfiler::OnGlContextDestroyed() { |
| 124 | has_gl_context_ = false; |
| 125 | Clear(); |
| 126 | } |
| 127 | |
| 128 | void GpuProfiler::Clear() { |
| 129 | events_.clear(); |
| 130 | for (auto& query : pending_gpu_queries_) |
| 131 | glDeleteQueries(1, &query.query_id); |
| 132 | pending_gpu_queries_.clear(); |
| 133 | while (!gl_timer_query_id_pool_.empty()) { |
| 134 | GLuint id = gl_timer_query_id_pool_.top(); |
| 135 | gl_timer_query_id_pool_.pop(); |
| 136 | glDeleteQueries(1, &id); |
| 137 | } |
| 138 | } |
| 139 | |
| 140 | void GpuProfiler::OnPendingQueryOverflow() { |
| 141 | ALOGW("Reached limit of %d pending queries in GpuProfiler." |
| 142 | " Clearing all queries.", kMaxPendingQueries); |
| 143 | Clear(); |
| 144 | } |
| 145 | |
Alex Vakulenko | e4eec20 | 2017-01-27 14:41:04 -0800 | [diff] [blame] | 146 | void GpuProfiler::SyncGlTimebase() { |
| 147 | if (!sync_with_cpu_time_) { |
| 148 | return; |
| 149 | } |
| 150 | |
| 151 | // Clear disjoint error status. |
| 152 | // This error status indicates that we need to ignore the result of the |
| 153 | // timer query because of some kind of disjoint GPU event such as heat |
| 154 | // throttling. |
| 155 | GLint disjoint = 0; |
| 156 | glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint); |
| 157 | |
| 158 | // Try to get the current GL timestamp. Since the GPU can supposedly fail to |
| 159 | // produce a timestamp occasionally we try a few times before giving up. |
| 160 | int attempts_remaining = 3; |
| 161 | do { |
| 162 | GLint64 gl_timestamp = 0; |
| 163 | glGetInteger64v(GL_TIMESTAMP_EXT, &gl_timestamp); |
| 164 | gl_timestamp = AdjustTimerQueryToNs(gl_timestamp); |
| 165 | |
| 166 | // Now get the CPU timebase. |
| 167 | int64_t cpu_timebase_ns = static_cast<int64_t>(GetSystemClockNs()); |
| 168 | |
| 169 | disjoint = 0; |
| 170 | glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint); |
| 171 | if (!disjoint) { |
| 172 | gl_timer_offset_ns_ = cpu_timebase_ns - gl_timestamp; |
| 173 | break; |
| 174 | } |
| 175 | ALOGW("WARNING: Skipping disjoint GPU timestamp"); |
| 176 | } while (--attempts_remaining > 0); |
| 177 | |
| 178 | if (attempts_remaining == 0) { |
| 179 | ALOGE("ERROR: Failed to sync GL timebase due to disjoint results\n"); |
| 180 | gl_timer_offset_ns_ = 0; |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | void GpuProfiler::QueryFrameBegin() { |
| 185 | GLuint begin_frame_id = TryAllocateGlQueryId(); |
| 186 | if (begin_frame_id != 0) { |
| 187 | glQueryCounter(begin_frame_id, GL_TIMESTAMP_EXT); |
| 188 | pending_gpu_queries_.push_back( |
| 189 | GpuTimerQuery(GetSystemClockNs(), 0, std::weak_ptr<int64_t>(), -1, |
| 190 | begin_frame_id, GpuTimerQuery::kQueryBeginFrame)); |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | void GpuProfiler::PollGlTimerQueries() { |
| 195 | if (!enabled()) { |
| 196 | return; |
| 197 | } |
| 198 | |
| 199 | #ifdef ENABLE_DISJOINT_TIMER_IGNORING |
| 200 | bool has_checked_disjoint = false; |
| 201 | bool was_disjoint = false; |
| 202 | #endif |
| 203 | for (;;) { |
| 204 | if (pending_gpu_queries_.empty()) { |
| 205 | // No queries pending. |
| 206 | return; |
| 207 | } |
| 208 | |
| 209 | GpuTimerQuery query = pending_gpu_queries_.front(); |
| 210 | |
| 211 | GLint available = 0; |
| 212 | glGetQueryObjectiv(query.query_id, GL_QUERY_RESULT_AVAILABLE_EXT, |
| 213 | &available); |
| 214 | if (!available) { |
| 215 | // No queries available. |
| 216 | return; |
| 217 | } |
| 218 | |
| 219 | // Found an available query, remove it from pending queue. |
| 220 | pending_gpu_queries_.pop_front(); |
| 221 | gl_timer_query_id_pool_.push(query.query_id); |
| 222 | |
| 223 | #ifdef ENABLE_DISJOINT_TIMER_IGNORING |
| 224 | if (!has_checked_disjoint) { |
| 225 | // Check if we need to ignore the result of the timer query because |
| 226 | // of some kind of disjoint GPU event such as heat throttling. |
| 227 | // If so, we ignore all events that are available during this loop. |
| 228 | has_checked_disjoint = true; |
| 229 | GLint disjoint_occurred = 0; |
| 230 | glGetIntegerv(GL_GPU_DISJOINT_EXT, &disjoint_occurred); |
| 231 | was_disjoint = !!disjoint_occurred; |
| 232 | if (was_disjoint) { |
| 233 | ALOGW("Skipping disjoint GPU events"); |
| 234 | } |
| 235 | } |
| 236 | |
| 237 | if (was_disjoint) { |
| 238 | continue; |
| 239 | } |
| 240 | #endif |
| 241 | |
| 242 | GLint64 timestamp_ns = 0; |
| 243 | glGetQueryObjecti64v(query.query_id, GL_QUERY_RESULT_EXT, ×tamp_ns); |
| 244 | timestamp_ns = AdjustTimerQueryToNs(timestamp_ns); |
| 245 | |
| 246 | int64_t adjusted_timestamp_ns; |
| 247 | |
| 248 | if (sync_with_cpu_time_) { |
| 249 | adjusted_timestamp_ns = timestamp_ns + gl_timer_offset_ns_; |
| 250 | |
| 251 | if (query.type == GpuTimerQuery::kQueryBeginFrame || |
| 252 | query.type == GpuTimerQuery::kQueryBeginScope) { |
| 253 | if (adjusted_timestamp_ns < query.timestamp_ns) { |
| 254 | // GPU clock is behind, adjust our offset to correct it. |
| 255 | gl_timer_offset_ns_ += query.timestamp_ns - adjusted_timestamp_ns; |
| 256 | adjusted_timestamp_ns = query.timestamp_ns; |
| 257 | } |
| 258 | } |
| 259 | } else { |
| 260 | adjusted_timestamp_ns = timestamp_ns; |
| 261 | } |
| 262 | |
| 263 | switch (query.type) { |
| 264 | case GpuTimerQuery::kQueryBeginFrame: |
| 265 | break; |
| 266 | case GpuTimerQuery::kQueryBeginScope: |
| 267 | events_[query.scope_name].enter(adjusted_timestamp_ns); |
| 268 | break; |
| 269 | case GpuTimerQuery::kQueryEndScope: |
| 270 | events_[query.scope_name].leave(adjusted_timestamp_ns, query.scope_name, |
| 271 | query.duration_ns, query.print_period); |
| 272 | break; |
| 273 | } |
| 274 | } |
| 275 | } |
| 276 | |
| 277 | void GpuProfiler::FinishGlTimerQueries() { |
| 278 | if (!enabled()) { |
| 279 | return; |
| 280 | } |
| 281 | |
| 282 | glFlush(); |
| 283 | PollGlTimerQueries(); |
| 284 | int max_iterations = 100; |
| 285 | while (!pending_gpu_queries_.empty()) { |
| 286 | if (--max_iterations <= 0) { |
| 287 | ALOGE("Error: GL timer queries failed to finish."); |
| 288 | break; |
| 289 | } |
| 290 | PollGlTimerQueries(); |
| 291 | usleep(1000); |
| 292 | } |
| 293 | } |
| 294 | |
| 295 | } // namespace dvr |
| 296 | } // namespace android |