blob: 3681e10ad83a643f0f5b166ec5c166d5b1dfcba6 [file] [log] [blame]
Alex Vakulenkoe4eec202017-01-27 14:41:04 -08001#include "include/private/dvr/late_latch.h"
2
3#include <unistd.h>
4
5#include <fstream>
6#include <iostream>
7#include <string>
8
9#include <base/logging.h>
10#include <private/dvr/clock_ns.h>
11#include <private/dvr/debug.h>
12#include <private/dvr/graphics/gpu_profiler.h>
13#include <private/dvr/pose_client_internal.h>
14#include <private/dvr/sensor_constants.h>
15#include <private/dvr/types.h>
16
17#define PRINT_MATRIX 0
18
19#if PRINT_MATRIX
20#ifndef LOG_TAG
21#define LOG_TAG "latelatch"
22#endif
23#include <cutils/log.h>
24
25#define PE(str, ...) \
26 fprintf(stderr, "[%s:%d] " str, __FILE__, __LINE__, ##__VA_ARGS__); \
27 ALOGI("[%s:%d] " str, __FILE__, __LINE__, ##__VA_ARGS__)
28
29#define PV4(v) PE(#v "=%f,%f,%f,%f\n", v[0], v[1], v[2], v[3]);
30#define PM4(m) \
31 PE(#m ":\n %f,%f,%f,%f\n %f,%f,%f,%f\n %f,%f,%f,%f\n %f,%f,%f,%f\n", \
32 m(0, 0), m(0, 1), m(0, 2), m(0, 3), m(1, 0), m(1, 1), m(1, 2), m(1, 3), \
33 m(2, 0), m(2, 1), m(2, 2), m(2, 3), m(3, 0), m(3, 1), m(3, 2), m(3, 3))
34#endif // PRINT_MATRIX
35
36#define STRINGIFY2(s) #s
37#define STRINGIFY(s) STRINGIFY2(s)
38
39// Compute shader bindings.
40// GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS must be at least 8 for GLES 3.1.
41#define POSE_BINDING 0
42#define RENDER_POSE_BINDING 1
43#define INPUT_BINDING 2
44#define OUTPUT_BINDING 3
45
46using android::pdx::LocalHandle;
47
48namespace {
49
50static const std::string kShaderLateLatch = R"( // NOLINT
51 struct Pose {
52 vec4 quat;
53 vec3 pos;
54 };
55
56 // Must match DvrPoseAsync C struct.
57 struct DvrPoseAsync {
58 vec4 orientation;
59 vec4 translation;
60 vec4 right_orientation;
61 vec4 right_translation;
62 vec4 angular_velocity;
63 vec4 velocity;
64 vec4 reserved[2];
65 };
66
67 // Must match LateLatchInputData C struct.
68 layout(binding = INPUT_BINDING, std140)
69 buffer InputData {
70 mat4 uEyeFromHeadMat[kSurfaceViewMaxCount];
71 mat4 uProjMat[kSurfaceViewMaxCount];
72 mat4 uPoseOffset[kSurfaceViewMaxCount];
73 mat4 uEdsMat1[kSurfaceViewMaxCount];
74 mat4 uEdsMat2[kSurfaceViewMaxCount];
75 uint uPoseIndex;
76 uint uRenderPoseIndex;
77 } bIn;
78
79 // std140 is to layout the structure in a consistent, standard way so we
80 // can access it from C++.
81 // This structure exactly matches the pose ring buffer in pose_client.h.
82 layout(binding = POSE_BINDING, std140)
83 buffer PoseBuffer {
84 DvrPoseAsync data[kPoseAsyncBufferTotalCount];
85 } bPose;
86
87 // Must stay in sync with DisplaySurfaceMetadata C struct.
88 // GPU thread 0 will exclusively read in a pose and capture it
89 // into this array.
90 layout(binding = RENDER_POSE_BINDING, std140)
91 buffer DisplaySurfaceMetadata {
92 vec4 orientation[kSurfaceBufferMaxCount];
93 vec4 translation[kSurfaceBufferMaxCount];
94 } bSurfaceData;
95
96 // Must stay in sync with DisplaySurfaceMetadata C struct.
97 // Each thread writes to a vertic
98 layout(binding = OUTPUT_BINDING, std140)
99 buffer Output {
100 mat4 viewProjMatrix[kSurfaceViewMaxCount];
101 mat4 viewMatrix[kSurfaceViewMaxCount];
102 vec4 quaternion;
103 vec4 translation;
104 } bOut;
105
106 // Thread 0 will also store the single quat/pos pair in shared variables
107 // for the other threads to use (left and right eye in this array).
108 shared Pose sharedPose[2];
109
110 // Rotate v1 by the given quaternion. This is based on mathfu's
111 // Quaternion::Rotate function. It is the typical implementation of this
112 // operation. Eigen has a similar method (Quaternion::_transformVector) that
113 // supposedly requires fewer operations, but I am skeptical of optimizing
114 // shader code without proper profiling first.
115 vec3 rotate(vec4 quat, vec3 v1) {
116 float ss = 2.0 * quat.w;
117 vec3 v = quat.xyz;
118 return ss * cross(v, v1) + (ss * quat.w - 1.0) * v1 +
119 2.0 * dot(v, v1) * v;
120 }
121
122 // See Eigen Quaternion::conjugate;
123 // Note that this isn't a true multiplicative inverse unless you can guarantee
124 // quat is also normalized, but that typically isn't an issue for our
125 // purposes.
126 vec4 quatInvert(vec4 quat) {
127 return vec4(-quat.xyz, quat.w);
128 }
129
130 // This is based on mathfu's Quaternion::operator*(Quaternion)
131 // Eigen's version is mathematically equivalent, just notationally different.
132 vec4 quatMul(vec4 q1, vec4 q2) {
133 return vec4(q1.w * q2.xyz + q2.w * q1.xyz + cross(q1.xyz, q2.xyz),
134 q1.w * q2.w - dot(q1.xyz, q2.xyz));
135 }
136
137 // Equivalent to pose.h GetObjectFromReferenceMatrix.
138 mat4 getInverseMatrix(Pose pose) {
139 // Invert quaternion and store fields the way Eigen does so we can
140 // keep in sync with Eigen methods easier.
141 vec4 quatInv = quatInvert(pose.quat);
142 vec3 v = quatInv.xyz;
143 float s = quatInv.w;
144 // Convert quaternion to matrix. See Eigen Quaternion::toRotationMatrix()
145 float x2 = v.x * v.x, y2 = v.y * v.y, z2 = v.z * v.z;
146 float sx = s * v.x, sy = s * v.y, sz = s * v.z;
147 float xz = v.x * v.z, yz = v.y * v.z, xy = v.x * v.y;
148 // Inverse translation.
149 vec3 point = -pose.pos;
150
151 return
152 mat4(1.0 - 2.0 * (y2 + z2), 2.0 * (xy + sz), 2.0 * (xz - sy), 0.0,
153 2.0 * (xy - sz), 1.0 - 2.0 * (x2 + z2), 2.0 * (sx + yz), 0.0,
154 2.0 * (sy + xz), 2.0 * (yz - sx), 1.0 - 2.0 * (x2 + y2), 0.0,
155 0.0, 0.0, 0.0, 1.0)*
156 mat4(1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0,
157 point.x, point.y, point.z, 1.0);
158 }
159
160 void appLateLatch() {
161 uint poseIndex = (gl_LocalInvocationIndex & uint(1));
162 mat4 head_from_center = getInverseMatrix(sharedPose[poseIndex]);
163 bOut.viewMatrix[gl_LocalInvocationIndex] =
164 bIn.uEyeFromHeadMat[gl_LocalInvocationIndex] *
165 head_from_center * bIn.uPoseOffset[gl_LocalInvocationIndex];
166 bOut.viewProjMatrix[gl_LocalInvocationIndex] =
167 bIn.uProjMat[gl_LocalInvocationIndex] *
168 bOut.viewMatrix[gl_LocalInvocationIndex];
169 }
170
171 // Extract the app frame's pose.
172 Pose getPoseFromApp() {
173 Pose p;
174 p.quat = bSurfaceData.orientation[bIn.uRenderPoseIndex];
175 p.pos = bSurfaceData.translation[bIn.uRenderPoseIndex].xyz;
176 return p;
177 }
178
179 // See Posef::GetPoseOffset.
180 Pose getPoseOffset(Pose p1, Pose p2) {
181 Pose p;
182 p.quat = quatMul(quatInvert(p2.quat), p1.quat);
183 // TODO(jbates) Consider enabling positional EDS when it is better
184 // tested.
185 // p.pos = p2.pos - p1.pos;
186 p.pos = vec3(0.0);
187 return p;
188 }
189
190 void edsLateLatch() {
191 Pose pose1 = getPoseFromApp();
192 Pose correction;
193 // Ignore the texture pose if the quat is not unit-length.
194 float tex_quat_length = length(pose1.quat);
195 uint poseIndex = (gl_LocalInvocationIndex & uint(1));
196 if (abs(tex_quat_length - 1.0) < 0.001)
197 correction = getPoseOffset(pose1, sharedPose[poseIndex]);
198 else
199 correction = Pose(vec4(0, 0, 0, 1), vec3(0, 0, 0));
200 mat4 eye_old_from_eye_new_matrix = getInverseMatrix(correction);
201 bOut.viewProjMatrix[gl_LocalInvocationIndex] =
202 bIn.uEdsMat1[gl_LocalInvocationIndex] *
203 eye_old_from_eye_new_matrix * bIn.uEdsMat2[gl_LocalInvocationIndex];
204 // Currently unused, except for debugging:
205 bOut.viewMatrix[gl_LocalInvocationIndex] = eye_old_from_eye_new_matrix;
206 }
207
208 // One thread per surface view.
209 layout (local_size_x = kSurfaceViewMaxCount, local_size_y = 1,
210 local_size_z = 1) in;
211
212 void main() {
213 // First, thread 0 late latches pose and stores it into various places.
214 if (gl_LocalInvocationIndex == uint(0)) {
215 sharedPose[0].quat = bPose.data[bIn.uPoseIndex].orientation;
216 sharedPose[0].pos = bPose.data[bIn.uPoseIndex].translation.xyz;
217 sharedPose[1].quat = bPose.data[bIn.uPoseIndex].right_orientation;
218 sharedPose[1].pos = bPose.data[bIn.uPoseIndex].right_translation.xyz;
219 if (IS_APP_LATE_LATCH) {
220 bSurfaceData.orientation[bIn.uRenderPoseIndex] = sharedPose[0].quat;
221 bSurfaceData.translation[bIn.uRenderPoseIndex] = vec4(sharedPose[0].pos, 0.0);
222 // TODO(jbates) implement app late-latch support for separate eye poses.
223 // App late latch currently uses the same pose for both eye views.
224 sharedPose[1] = sharedPose[0];
225 }
226 bOut.quaternion = sharedPose[0].quat;
227 bOut.translation = vec4(sharedPose[0].pos, 0.0);
228 }
229
230 // Memory barrier to make sure all threads can see prior writes.
231 memoryBarrierShared();
232
233 // Execution barrier to block all threads here until all threads have
234 // reached this point -- ensures the late latching is done.
235 barrier();
236
237 if (IS_APP_LATE_LATCH)
238 appLateLatch();
239 else
240 edsLateLatch();
241 }
242)";
243
244} // anonymous namespace
245
246namespace android {
247namespace dvr {
248
249LateLatch::LateLatch(bool is_app_late_latch)
250 : LateLatch(is_app_late_latch, LocalHandle()) {}
251
252LateLatch::LateLatch(bool is_app_late_latch,
253 LocalHandle&& surface_metadata_fd)
254 : is_app_late_latch_(is_app_late_latch),
255 app_late_latch_output_(NULL),
256 eds_late_latch_output_(NULL) {
257 CHECK_GL();
258 glGenBuffers(1, &input_buffer_id_);
259 glBindBuffer(GL_SHADER_STORAGE_BUFFER, input_buffer_id_);
260 glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(LateLatchInput), nullptr,
261 GL_DYNAMIC_DRAW);
262 glGenBuffers(1, &output_buffer_id_);
263 glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_buffer_id_);
264 glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(LateLatchOutput), nullptr,
265 GL_DYNAMIC_COPY);
266 CHECK_GL();
267
268 LocalHandle pose_buffer_fd;
269 pose_client_ = dvrPoseCreate();
270 if (!pose_client_) {
271 LOG(ERROR) << "LateLatch Error: failed to create pose client";
272 } else {
273 int ret = privateDvrPoseGetRingBufferFd(pose_client_, &pose_buffer_fd);
274 if (ret < 0) {
275 LOG(ERROR) << "LateLatch Error: failed to get pose ring buffer";
276 }
277 }
278
279 glGenBuffers(1, &pose_buffer_object_);
280 glGenBuffers(1, &metadata_buffer_id_);
281 if (!glBindSharedBufferQCOM) {
282 LOG(ERROR) << "Error: Missing gralloc buffer extension, no pose data";
283 } else {
284 if (pose_buffer_fd) {
285 glBindBuffer(GL_SHADER_STORAGE_BUFFER, pose_buffer_object_);
286 glBindSharedBufferQCOM(GL_SHADER_STORAGE_BUFFER,
287 kPoseAsyncBufferTotalCount * sizeof(DvrPoseAsync),
288 pose_buffer_fd.Release());
289 }
290 CHECK_GL();
291 }
292
293 glBindBuffer(GL_SHADER_STORAGE_BUFFER, metadata_buffer_id_);
294 if (surface_metadata_fd && glBindSharedBufferQCOM) {
295 glBindSharedBufferQCOM(GL_SHADER_STORAGE_BUFFER,
296 sizeof(DisplaySurfaceMetadata),
297 surface_metadata_fd.Release());
298 } else {
299 // Fall back on internal metadata buffer when none provided, for example
300 // when distortion is done in the application process.
301 glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(DisplaySurfaceMetadata),
302 nullptr, GL_DYNAMIC_COPY);
303 }
304 CHECK_GL();
305 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
306
307 CHECK_GL();
308 LoadLateLatchShader();
309}
310
311LateLatch::~LateLatch() {
312 glDeleteBuffers(1, &metadata_buffer_id_);
313 glDeleteBuffers(1, &input_buffer_id_);
314 glDeleteBuffers(1, &output_buffer_id_);
315 glDeleteBuffers(1, &pose_buffer_object_);
316 dvrPoseDestroy(pose_client_);
317}
318
319void LateLatch::LoadLateLatchShader() {
320 std::string str;
321 str += "\n#define POSE_BINDING " STRINGIFY(POSE_BINDING);
322 str += "\n#define RENDER_POSE_BINDING " STRINGIFY(RENDER_POSE_BINDING);
323 str += "\n#define INPUT_BINDING " STRINGIFY(INPUT_BINDING);
324 str += "\n#define OUTPUT_BINDING " STRINGIFY(OUTPUT_BINDING);
325 str += "\n#define kPoseAsyncBufferTotalCount " STRINGIFY(
326 kPoseAsyncBufferTotalCount);
327 str += "\n#define kSurfaceBufferMaxCount " STRINGIFY(kSurfaceBufferMaxCount);
328 str += "\n#define kSurfaceBufferMaxCount " STRINGIFY(kSurfaceBufferMaxCount);
329 str += "\n#define kSurfaceViewMaxCount " STRINGIFY(kSurfaceViewMaxCount);
330 str += "\n#define IS_APP_LATE_LATCH ";
331 str += is_app_late_latch_ ? "true" : "false";
332 str += "\n";
333 str += kShaderLateLatch;
334 late_latch_program_.Link(str);
335 CHECK_GL();
336}
337
338void LateLatch::CaptureOutputData(LateLatchOutput* data) const {
339 glBindBuffer(GL_SHADER_STORAGE_BUFFER, output_buffer_id_);
340 LateLatchOutput* out_data = static_cast<LateLatchOutput*>(glMapBufferRange(
341 GL_SHADER_STORAGE_BUFFER, 0, sizeof(LateLatchOutput), GL_MAP_READ_BIT));
342 *data = *out_data;
343 glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
344 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
345 CHECK_GL();
346}
347
348void LateLatch::AddLateLatch(const LateLatchInput& data) const {
349 CHECK(is_app_late_latch_);
350 CHECK_GL();
351 late_latch_program_.Use();
352
353 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_POSE_BINDING,
354 metadata_buffer_id_);
355 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, POSE_BINDING, pose_buffer_object_);
356 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BINDING, output_buffer_id_);
357 glBindBuffer(GL_SHADER_STORAGE_BUFFER, input_buffer_id_);
358 LateLatchInput* adata = (LateLatchInput*)glMapBufferRange(
359 GL_SHADER_STORAGE_BUFFER, 0, sizeof(LateLatchInput),
360 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
361 if (adata)
362 *adata = data;
363 else
364 LOG(ERROR) << "Error: LateLatchInput gl mapping is null";
365 glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
366 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BINDING, input_buffer_id_);
367 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
368 CHECK_GL();
369
370 // The output buffer is going to be written but it may be read by
371 // earlier shaders, so we need a shader storage memory barrier.
372 glMemoryBarrier(GL_SHADER_STORAGE_BUFFER);
373
374 glDispatchCompute(1, 1, 1);
375 CHECK_GL();
376
377 // The transform feedback buffer is going to be read as a uniform by the app,
378 // so we need a uniform memory barrier.
379 glMemoryBarrier(GL_UNIFORM_BARRIER_BIT);
380
381 if (app_late_latch_output_) {
382 // Capture the output data:
383 CaptureOutputData(app_late_latch_output_);
384 }
385#if PRINT_MATRIX
386 // Print the composed matrix to stderr:
387 LateLatchOutput out_data;
388 CaptureOutputData(&out_data);
389 CHECK_GL();
390 PE("LL APP slot:%d\n", data.render_pose_index);
391 PM4(data.proj_mat[0]);
392 PM4(out_data.view_proj_matrix[0]);
393 PM4(out_data.view_proj_matrix[1]);
394 PM4(out_data.view_proj_matrix[2]);
395 PM4(out_data.view_proj_matrix[3]);
396 PM4(out_data.view_matrix[0]);
397 PM4(out_data.view_matrix[1]);
398 PM4(out_data.view_matrix[2]);
399 PM4(out_data.view_matrix[3]);
400 PV4(out_data.pose_quaternion);
401 PV4(out_data.pose_translation);
402#endif
403
404 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_POSE_BINDING, 0);
405 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, POSE_BINDING, 0);
406 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BINDING, 0);
407 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BINDING, 0);
408 glUseProgram(0);
409}
410
411void LateLatch::AddEdsLateLatch(const LateLatchInput& data,
412 GLuint render_pose_buffer_object) const {
413 CHECK(!is_app_late_latch_);
414 late_latch_program_.Use();
415
416 // Fall back on internal buffer when none is provided.
417 if (!render_pose_buffer_object)
418 render_pose_buffer_object = metadata_buffer_id_;
419
420 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_POSE_BINDING,
421 render_pose_buffer_object);
422 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, POSE_BINDING, pose_buffer_object_);
423 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BINDING, output_buffer_id_);
424 glBindBuffer(GL_SHADER_STORAGE_BUFFER, input_buffer_id_);
425 LateLatchInput* adata = (LateLatchInput*)glMapBufferRange(
426 GL_SHADER_STORAGE_BUFFER, 0, sizeof(LateLatchInput),
427 GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT);
428 *adata = data;
429 glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
430 glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
431 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BINDING, input_buffer_id_);
432 CHECK_GL();
433
434 glDispatchCompute(1, 1, 1);
435 CHECK_GL();
436
437 if (eds_late_latch_output_) {
438 // Capture the output data:
439 CaptureOutputData(eds_late_latch_output_);
440 }
441#if PRINT_MATRIX
442 // Print the composed matrix to stderr:
443 LateLatchOutput out_data;
444 CaptureOutputData(&out_data);
445 CHECK_GL();
446 PE("LL EDS\n");
447 PM4(out_data.view_proj_matrix[0]);
448 PM4(out_data.view_matrix[0]);
449 PV4(out_data.pose_quaternion);
450 PV4(out_data.pose_translation);
451#endif
452
453 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, RENDER_POSE_BINDING, 0);
454 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, POSE_BINDING, 0);
455 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, OUTPUT_BINDING, 0);
456 glBindBufferBase(GL_SHADER_STORAGE_BUFFER, INPUT_BINDING, 0);
457 glUseProgram(0);
458}
459
460} // namespace dvr
461} // namespace android