blob: b1a15896cd26a546d71ca20c3113a20a683f4174 [file] [log] [blame]
Alex Vakulenkoe4eec202017-01-27 14:41:04 -08001#include "include/private/dvr/late_latch.h"
2
3#include <unistd.h>
4
5#include <fstream>
6#include <iostream>
7#include <string>
8
Alex Vakulenko4fe60582017-02-02 11:35:59 -08009#include <log/log.h>
Alex Vakulenkoe4eec202017-01-27 14:41:04 -080010#include <private/dvr/clock_ns.h>
11#include <private/dvr/debug.h>
12#include <private/dvr/graphics/gpu_profiler.h>
13#include <private/dvr/pose_client_internal.h>
14#include <private/dvr/sensor_constants.h>
15#include <private/dvr/types.h>
16
17#define PRINT_MATRIX 0
18
19#if PRINT_MATRIX
20#ifndef LOG_TAG
21#define LOG_TAG "latelatch"
22#endif
Alex Vakulenkoe4eec202017-01-27 14:41:04 -080023
24#define PE(str, ...) \
25 fprintf(stderr, "[%s:%d] " str, __FILE__, __LINE__, ##__VA_ARGS__); \
26 ALOGI("[%s:%d] " str, __FILE__, __LINE__, ##__VA_ARGS__)
27
28#define PV4(v) PE(#v "=%f,%f,%f,%f\n", v[0], v[1], v[2], v[3]);
29#define PM4(m) \
30 PE(#m ":\n %f,%f,%f,%f\n %f,%f,%f,%f\n %f,%f,%f,%f\n %f,%f,%f,%f\n", \
31 m(0, 0), m(0, 1), m(0, 2), m(0, 3), m(1, 0), m(1, 1), m(1, 2), m(1, 3), \
32 m(2, 0), m(2, 1), m(2, 2), m(2, 3), m(3, 0), m(3, 1), m(3, 2), m(3, 3))
33#endif // PRINT_MATRIX
34
35#define STRINGIFY2(s) #s
36#define STRINGIFY(s) STRINGIFY2(s)
37
38// Compute shader bindings.
39// GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS must be at least 8 for GLES 3.1.
40#define POSE_BINDING 0
41#define RENDER_POSE_BINDING 1
42#define INPUT_BINDING 2
43#define OUTPUT_BINDING 3
44
45using android::pdx::LocalHandle;
46
47namespace {
48
49static const std::string kShaderLateLatch = R"( // NOLINT
50 struct Pose {
51 vec4 quat;
52 vec3 pos;
53 };
54
55 // Must match DvrPoseAsync C struct.
56 struct DvrPoseAsync {
57 vec4 orientation;
58 vec4 translation;
59 vec4 right_orientation;
60 vec4 right_translation;
61 vec4 angular_velocity;
62 vec4 velocity;
63 vec4 reserved[2];
64 };
65
66 // Must match LateLatchInputData C struct.
67 layout(binding = INPUT_BINDING, std140)
68 buffer InputData {
69 mat4 uEyeFromHeadMat[kSurfaceViewMaxCount];
70 mat4 uProjMat[kSurfaceViewMaxCount];
71 mat4 uPoseOffset[kSurfaceViewMaxCount];
72 mat4 uEdsMat1[kSurfaceViewMaxCount];
73 mat4 uEdsMat2[kSurfaceViewMaxCount];
74 uint uPoseIndex;
75 uint uRenderPoseIndex;
76 } bIn;
77
78 // std140 is to layout the structure in a consistent, standard way so we
79 // can access it from C++.
80 // This structure exactly matches the pose ring buffer in pose_client.h.
81 layout(binding = POSE_BINDING, std140)
82 buffer PoseBuffer {
83 DvrPoseAsync data[kPoseAsyncBufferTotalCount];
84 } bPose;
85
86 // Must stay in sync with DisplaySurfaceMetadata C struct.
87 // GPU thread 0 will exclusively read in a pose and capture it
88 // into this array.
89 layout(binding = RENDER_POSE_BINDING, std140)
90 buffer DisplaySurfaceMetadata {
91 vec4 orientation[kSurfaceBufferMaxCount];
92 vec4 translation[kSurfaceBufferMaxCount];
93 } bSurfaceData;
94
95 // Must stay in sync with DisplaySurfaceMetadata C struct.
96 // Each thread writes to a vertic
97 layout(binding = OUTPUT_BINDING, std140)
98 buffer Output {
99 mat4 viewProjMatrix[kSurfaceViewMaxCount];
100 mat4 viewMatrix[kSurfaceViewMaxCount];
101 vec4 quaternion;
102 vec4 translation;
103 } bOut;
104
105 // Thread 0 will also store the single quat/pos pair in shared variables
106 // for the other threads to use (left and right eye in this array).
107 shared Pose sharedPose[2];
108
109 // Rotate v1 by the given quaternion. This is based on mathfu's
110 // Quaternion::Rotate function. It is the typical implementation of this
111 // operation. Eigen has a similar method (Quaternion::_transformVector) that
112 // supposedly requires fewer operations, but I am skeptical of optimizing
113 // shader code without proper profiling first.
114 vec3 rotate(vec4 quat, vec3 v1) {
115 float ss = 2.0 * quat.w;
116 vec3 v = quat.xyz;
117 return ss * cross(v, v1) + (ss * quat.w - 1.0) * v1 +
118 2.0 * dot(v, v1) * v;
119 }
120
121 // See Eigen Quaternion::conjugate;
122 // Note that this isn't a true multiplicative inverse unless you can guarantee
123 // quat is also normalized, but that typically isn't an issue for our
124 // purposes.
125 vec4 quatInvert(vec4 quat) {
126 return vec4(-quat.xyz, quat.w);
127 }
128
129 // This is based on mathfu's Quaternion::operator*(Quaternion)
130 // Eigen's version is mathematically equivalent, just notationally different.
131 vec4 quatMul(vec4 q1, vec4 q2) {
132 return vec4(q1.w * q2.xyz + q2.w * q1.xyz + cross(q1.xyz, q2.xyz),
133 q1.w * q2.w - dot(q1.xyz, q2.xyz));
134 }
135
136 // Equivalent to pose.h GetObjectFromReferenceMatrix.
137 mat4 getInverseMatrix(Pose pose) {
138 // Invert quaternion and store fields the way Eigen does so we can
139 // keep in sync with Eigen methods easier.
140 vec4 quatInv = quatInvert(pose.quat);
141 vec3 v = quatInv.xyz;
142 float s = quatInv.w;
143 // Convert quaternion to matrix. See Eigen Quaternion::toRotationMatrix()
144 float x2 = v.x * v.x, y2 = v.y * v.y, z2 = v.z * v.z;
145 float sx = s * v.x, sy = s * v.y, sz = s * v.z;
146 float xz = v.x * v.z, yz = v.y * v.z, xy = v.x * v.y;
147 // Inverse translation.
148 vec3 point = -pose.pos;
149
150 return
151 mat4(1.0 - 2.0 * (y2 + z2), 2.0 * (xy + sz), 2.0 * (xz - sy), 0.0,
152 2.0 * (xy - sz), 1.0 - 2.0 * (x2 + z2), 2.0 * (sx + yz), 0.0,
153 2.0 * (sy + xz), 2.0 * (yz - sx), 1.0 - 2.0 * (x2 + y2), 0.0,
154 0.0, 0.0, 0.0, 1.0)*
155 mat4(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,
156 point.x, point.y, point.z, 1.0);
157 }
158
159 void appLateLatch() {
160 uint poseIndex = (gl_LocalInvocationIndex & uint(1));
161 mat4 head_from_center = getInverseMatrix(sharedPose[poseIndex]);
162 bOut.viewMatrix[gl_LocalInvocationIndex] =
163 bIn.uEyeFromHeadMat[gl_LocalInvocationIndex] *
164 head_from_center * bIn.uPoseOffset[gl_LocalInvocationIndex];
165 bOut.viewProjMatrix[gl_LocalInvocationIndex] =
166 bIn.uProjMat[gl_LocalInvocationIndex] *
167 bOut.viewMatrix[gl_LocalInvocationIndex];
168 }
169
170 // Extract the app frame's pose.
171 Pose getPoseFromApp() {
172 Pose p;
173 p.quat = bSurfaceData.orientation[bIn.uRenderPoseIndex];
174 p.pos = bSurfaceData.translation[bIn.uRenderPoseIndex].xyz;
175 return p;
176 }
177
178 // See Posef::GetPoseOffset.
179 Pose getPoseOffset(Pose p1, Pose p2) {
180 Pose p;
181 p.quat = quatMul(quatInvert(p2.quat), p1.quat);
182 // TODO(jbates) Consider enabling positional EDS when it is better
183 // tested.
184 // p.pos = p2.pos - p1.pos;
185 p.pos = vec3(0.0);
186 return p;
187 }
188
189 void edsLateLatch() {
190 Pose pose1 = getPoseFromApp();
191 Pose correction;
192 // Ignore the texture pose if the quat is not unit-length.
193 float tex_quat_length = length(pose1.quat);
194 uint poseIndex = (gl_LocalInvocationIndex & uint(1));
195 if (abs(tex_quat_length - 1.0) < 0.001)
196 correction = getPoseOffset(pose1, sharedPose[poseIndex]);
197 else
198 correction = Pose(vec4(0, 0, 0, 1), vec3(0, 0, 0));
199 mat4 eye_old_from_eye_new_matrix = getInverseMatrix(correction);
200 bOut.viewProjMatrix[gl_LocalInvocationIndex] =
201 bIn.uEdsMat1[gl_LocalInvocationIndex] *
202 eye_old_from_eye_new_matrix * bIn.uEdsMat2[gl_LocalInvocationIndex];
203 // Currently unused, except for debugging:
204 bOut.viewMatrix[gl_LocalInvocationIndex] = eye_old_from_eye_new_matrix;
205 }
206
207 // One thread per surface view.
208 layout (local_size_x = kSurfaceViewMaxCount, local_size_y = 1,
209 local_size_z = 1) in;
210
211 void main() {
212 // First, thread 0 late latches pose and stores it into various places.
213 if (gl_LocalInvocationIndex == uint(0)) {
214 sharedPose[0].quat = bPose.data[bIn.uPoseIndex].orientation;
215 sharedPose[0].pos = bPose.data[bIn.uPoseIndex].translation.xyz;
216 sharedPose[1].quat = bPose.data[bIn.uPoseIndex].right_orientation;
217 sharedPose[1].pos = bPose.data[bIn.uPoseIndex].right_translation.xyz;
218 if (IS_APP_LATE_LATCH) {
219 bSurfaceData.orientation[bIn.uRenderPoseIndex] = sharedPose[0].quat;
220 bSurfaceData.translation[bIn.uRenderPoseIndex] = vec4(sharedPose[0].pos, 0.0);
221 // TODO(jbates) implement app late-latch support for separate eye poses.
222 // App late latch currently uses the same pose for both eye views.
223 sharedPose[1] = sharedPose[0];
224 }
225 bOut.quaternion = sharedPose[0].quat;
226 bOut.translation = vec4(sharedPose[0].pos, 0.0);
227 }
228
229 // Memory barrier to make sure all threads can see prior writes.
230 memoryBarrierShared();
231
232 // Execution barrier to block all threads here until all threads have
233 // reached this point -- ensures the late latching is done.
234 barrier();
235
236 if (IS_APP_LATE_LATCH)
237 appLateLatch();
238 else
239 edsLateLatch();
240 }
241)";
242
243} // anonymous namespace
244
245namespace android {
246namespace dvr {
247
248LateLatch::LateLatch(bool is_app_late_latch)
249 : LateLatch(is_app_late_latch, LocalHandle()) {}
250
251LateLatch::LateLatch(bool is_app_late_latch,
252 LocalHandle&& surface_metadata_fd)
253 : is_app_late_latch_(is_app_late_latch),
254 app_late_latch_output_(NULL),
255 eds_late_latch_output_(NULL) {
256 CHECK_GL();
257 glGenBuffers(1, &input_buffer_id_);
258 glBindBuffer(GL_SHADER_STORAGE_BUFFER, input_buffer_id_);
259 glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(LateLatchInput), nullptr,
260 GL_DYNAMIC_DRAW);
261 glGenBuffers(1, &output_buffer_id_);
262 glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_buffer_id_);
263 glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(LateLatchOutput), nullptr,
264 GL_DYNAMIC_COPY);
265 CHECK_GL();
266
267 LocalHandle pose_buffer_fd;
268 pose_client_ = dvrPoseCreate();
269 if (!pose_client_) {
Alex Vakulenko4fe60582017-02-02 11:35:59 -0800270 ALOGE("LateLatch Error: failed to create pose client");
Alex Vakulenkoe4eec202017-01-27 14:41:04 -0800271 } else {
272 int ret = privateDvrPoseGetRingBufferFd(pose_client_, &pose_buffer_fd);
273 if (ret < 0) {
Alex Vakulenko4fe60582017-02-02 11:35:59 -0800274 ALOGE("LateLatch Error: failed to get pose ring buffer");
Alex Vakulenkoe4eec202017-01-27 14:41:04 -0800275 }
276 }
277
278 glGenBuffers(1, &pose_buffer_object_);
279 glGenBuffers(1, &metadata_buffer_id_);
280 if (!glBindSharedBufferQCOM) {
Alex Vakulenko4fe60582017-02-02 11:35:59 -0800281 ALOGE("Error: Missing gralloc buffer extension, no pose data");
Alex Vakulenkoe4eec202017-01-27 14:41:04 -0800282 } else {
283 if (pose_buffer_fd) {
284 glBindBuffer(GL_SHADER_STORAGE_BUFFER, pose_buffer_object_);
285 glBindSharedBufferQCOM(GL_SHADER_STORAGE_BUFFER,
286 kPoseAsyncBufferTotalCount * sizeof(DvrPoseAsync),
287 pose_buffer_fd.Release());
288 }
289 CHECK_GL();
290 }
291
292 glBindBuffer(GL_SHADER_STORAGE_BUFFER, metadata_buffer_id_);
293 if (surface_metadata_fd && glBindSharedBufferQCOM) {
294 glBindSharedBufferQCOM(GL_SHADER_STORAGE_BUFFER,
295 sizeof(DisplaySurfaceMetadata),
296 surface_metadata_fd.Release());
297 } else {
298 // Fall back on internal metadata buffer when none provided, for example
299 // when distortion is done in the application process.
300 glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(DisplaySurfaceMetadata),
301 nullptr, GL_DYNAMIC_COPY);
302 }
303 CHECK_GL();
304 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
305
306 CHECK_GL();
307 LoadLateLatchShader();
308}
309
310LateLatch::~LateLatch() {
311 glDeleteBuffers(1, &metadata_buffer_id_);
312 glDeleteBuffers(1, &input_buffer_id_);
313 glDeleteBuffers(1, &output_buffer_id_);
314 glDeleteBuffers(1, &pose_buffer_object_);
315 dvrPoseDestroy(pose_client_);
316}
317
318void LateLatch::LoadLateLatchShader() {
319 std::string str;
320 str += "\n#define POSE_BINDING " STRINGIFY(POSE_BINDING);
321 str += "\n#define RENDER_POSE_BINDING " STRINGIFY(RENDER_POSE_BINDING);
322 str += "\n#define INPUT_BINDING " STRINGIFY(INPUT_BINDING);
323 str += "\n#define OUTPUT_BINDING " STRINGIFY(OUTPUT_BINDING);
324 str += "\n#define kPoseAsyncBufferTotalCount " STRINGIFY(
325 kPoseAsyncBufferTotalCount);
326 str += "\n#define kSurfaceBufferMaxCount " STRINGIFY(kSurfaceBufferMaxCount);
327 str += "\n#define kSurfaceBufferMaxCount " STRINGIFY(kSurfaceBufferMaxCount);
328 str += "\n#define kSurfaceViewMaxCount " STRINGIFY(kSurfaceViewMaxCount);
329 str += "\n#define IS_APP_LATE_LATCH ";
330 str += is_app_late_latch_ ? "true" : "false";
331 str += "\n";
332 str += kShaderLateLatch;
333 late_latch_program_.Link(str);
334 CHECK_GL();
335}
336
337void LateLatch::CaptureOutputData(LateLatchOutput* data) const {
338 glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_buffer_id_);
339 LateLatchOutput* out_data = static_cast<LateLatchOutput*>(glMapBufferRange(
340 GL_SHADER_STORAGE_BUFFER, 0, sizeof(LateLatchOutput), GL_MAP_READ_BIT));
341 *data = *out_data;
342 glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
343 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
344 CHECK_GL();
345}
346
347void LateLatch::AddLateLatch(const LateLatchInput& data) const {
Alex Vakulenko4fe60582017-02-02 11:35:59 -0800348 LOG_ALWAYS_FATAL_IF(!is_app_late_latch_);
Alex Vakulenkoe4eec202017-01-27 14:41:04 -0800349 CHECK_GL();
350 late_latch_program_.Use();
351
352 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_POSE_BINDING,
353 metadata_buffer_id_);
354 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, POSE_BINDING, pose_buffer_object_);
355 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BINDING, output_buffer_id_);
356 glBindBuffer(GL_SHADER_STORAGE_BUFFER, input_buffer_id_);
357 LateLatchInput* adata = (LateLatchInput*)glMapBufferRange(
358 GL_SHADER_STORAGE_BUFFER, 0, sizeof(LateLatchInput),
359 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
360 if (adata)
361 *adata = data;
362 else
Alex Vakulenko4fe60582017-02-02 11:35:59 -0800363 ALOGE("Error: LateLatchInput gl mapping is null");
Alex Vakulenkoe4eec202017-01-27 14:41:04 -0800364 glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
365 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BINDING, input_buffer_id_);
366 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
367 CHECK_GL();
368
369 // The output buffer is going to be written but it may be read by
370 // earlier shaders, so we need a shader storage memory barrier.
371 glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
372
373 glDispatchCompute(1, 1, 1);
374 CHECK_GL();
375
376 // The transform feedback buffer is going to be read as a uniform by the app,
377 // so we need a uniform memory barrier.
378 glMemoryBarrier(GL_UNIFORM_BARRIER_BIT);
379
380 if (app_late_latch_output_) {
381 // Capture the output data:
382 CaptureOutputData(app_late_latch_output_);
383 }
384#if PRINT_MATRIX
385 // Print the composed matrix to stderr:
386 LateLatchOutput out_data;
387 CaptureOutputData(&out_data);
388 CHECK_GL();
389 PE("LL APP slot:%d\n", data.render_pose_index);
390 PM4(data.proj_mat[0]);
391 PM4(out_data.view_proj_matrix[0]);
392 PM4(out_data.view_proj_matrix[1]);
393 PM4(out_data.view_proj_matrix[2]);
394 PM4(out_data.view_proj_matrix[3]);
395 PM4(out_data.view_matrix[0]);
396 PM4(out_data.view_matrix[1]);
397 PM4(out_data.view_matrix[2]);
398 PM4(out_data.view_matrix[3]);
399 PV4(out_data.pose_quaternion);
400 PV4(out_data.pose_translation);
401#endif
402
403 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_POSE_BINDING, 0);
404 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, POSE_BINDING, 0);
405 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BINDING, 0);
406 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BINDING, 0);
407 glUseProgram(0);
408}
409
410void LateLatch::AddEdsLateLatch(const LateLatchInput& data,
411 GLuint render_pose_buffer_object) const {
Alex Vakulenko4fe60582017-02-02 11:35:59 -0800412 LOG_ALWAYS_FATAL_IF(is_app_late_latch_);
Alex Vakulenkoe4eec202017-01-27 14:41:04 -0800413 late_latch_program_.Use();
414
415 // Fall back on internal buffer when none is provided.
416 if (!render_pose_buffer_object)
417 render_pose_buffer_object = metadata_buffer_id_;
418
419 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_POSE_BINDING,
420 render_pose_buffer_object);
421 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, POSE_BINDING, pose_buffer_object_);
422 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BINDING, output_buffer_id_);
423 glBindBuffer(GL_SHADER_STORAGE_BUFFER, input_buffer_id_);
424 LateLatchInput* adata = (LateLatchInput*)glMapBufferRange(
425 GL_SHADER_STORAGE_BUFFER, 0, sizeof(LateLatchInput),
426 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
427 *adata = data;
428 glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
429 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
430 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BINDING, input_buffer_id_);
431 CHECK_GL();
432
433 glDispatchCompute(1, 1, 1);
434 CHECK_GL();
435
436 if (eds_late_latch_output_) {
437 // Capture the output data:
438 CaptureOutputData(eds_late_latch_output_);
439 }
440#if PRINT_MATRIX
441 // Print the composed matrix to stderr:
442 LateLatchOutput out_data;
443 CaptureOutputData(&out_data);
444 CHECK_GL();
445 PE("LL EDS\n");
446 PM4(out_data.view_proj_matrix[0]);
447 PM4(out_data.view_matrix[0]);
448 PV4(out_data.pose_quaternion);
449 PV4(out_data.pose_translation);
450#endif
451
452 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_POSE_BINDING, 0);
453 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, POSE_BINDING, 0);
454 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BINDING, 0);
455 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BINDING, 0);
456 glUseProgram(0);
457}
458
459} // namespace dvr
460} // namespace android