Spatial Audio: Roll pitch yaw logging.
Add roll, pitch, yaw angle logging.
Test: atest libheadtracking-test
Test: check dumpsys, use head tracker
Bug: 269620212
Bug: 269683154
Merged-In: Iaa0249b8498a0b5d4e72e777d62036096e643f37
Change-Id: Iaa0249b8498a0b5d4e72e777d62036096e643f37
diff --git a/media/libheadtracking/Android.bp b/media/libheadtracking/Android.bp
index 5bffb42..9636949 100644
--- a/media/libheadtracking/Android.bp
+++ b/media/libheadtracking/Android.bp
@@ -88,6 +88,7 @@
],
shared_libs: [
"libaudioutils",
+ "libbase", // StringAppendF
"libheadtracking",
],
}
diff --git a/media/libheadtracking/QuaternionUtil-test.cpp b/media/libheadtracking/QuaternionUtil-test.cpp
index ebb4223..cfeca00 100644
--- a/media/libheadtracking/QuaternionUtil-test.cpp
+++ b/media/libheadtracking/QuaternionUtil-test.cpp
@@ -51,6 +51,92 @@
EXPECT_EQ(vec, quaternionToRotationVector(rotationVectorToQuaternion(vec)));
}
+// Float precision necessitates this precision (1e-4f fails)
+constexpr float NEAR = 1e-3f;
+
+TEST(QuaternionUtil, quaternionToAngles_basic) {
+ float pitch, roll, yaw;
+
+ // angles as reported.
+ // choose 11 angles between -M_PI / 2 to M_PI / 2
+ for (int step = -5; step <= 5; ++step) {
+ const float angle = M_PI * step * 0.1f;
+
+ quaternionToAngles(rotationVectorToQuaternion({angle, 0.f, 0.f}), &pitch, &roll, &yaw);
+ EXPECT_NEAR(angle, pitch, NEAR);
+ EXPECT_NEAR(0.f, roll, NEAR);
+ EXPECT_NEAR(0.f, yaw, NEAR);
+
+ quaternionToAngles(rotationVectorToQuaternion({0.f, angle, 0.f}), &pitch, &roll, &yaw);
+ EXPECT_NEAR(0.f, pitch, NEAR);
+ EXPECT_NEAR(angle, roll, NEAR);
+ EXPECT_NEAR(0.f, yaw, NEAR);
+
+ quaternionToAngles(rotationVectorToQuaternion({0.f, 0.f, angle}), &pitch, &roll, &yaw);
+ EXPECT_NEAR(0.f, pitch, NEAR);
+ EXPECT_NEAR(0.f, roll, NEAR);
+ EXPECT_NEAR(angle, yaw, NEAR);
+ }
+
+ // Generates a debug string
+ const std::string s = quaternionToAngles<true /* DEBUG */>(
+ rotationVectorToQuaternion({M_PI, 0.f, 0.f}), &pitch, &roll, &yaw);
+ ASSERT_FALSE(s.empty());
+}
+
+TEST(QuaternionUtil, quaternionToAngles_zaxis) {
+ float pitch, roll, yaw;
+
+ for (int rot_step = -10; rot_step <= 10; ++rot_step) {
+ const float rot_angle = M_PI * rot_step * 0.1f;
+ // pitch independent of world Z rotation
+
+ // We don't test the boundaries of pitch +-M_PI/2 as roll can become
+ // degenerate and atan(0, 0) may report 0, PI, or -PI.
+ for (int step = -4; step <= 4; ++step) {
+ const float angle = M_PI * step * 0.1f;
+ auto q = rotationVectorToQuaternion({angle, 0.f, 0.f});
+ auto world_z = rotationVectorToQuaternion({0.f, 0.f, rot_angle});
+
+ // Sequential active rotations (on world frame) compose as R_2 * R_1.
+ quaternionToAngles(world_z * q, &pitch, &roll, &yaw);
+
+ EXPECT_NEAR(angle, pitch, NEAR);
+ EXPECT_NEAR(0.f, roll, NEAR);
+ }
+
+ // roll independent of world Z rotation
+ for (int step = -5; step <= 5; ++step) {
+ const float angle = M_PI * step * 0.1f;
+ auto q = rotationVectorToQuaternion({0.f, angle, 0.f});
+ auto world_z = rotationVectorToQuaternion({0.f, 0.f, rot_angle});
+
+ // Sequential active rotations (on world frame) compose as R_2 * R_1.
+ quaternionToAngles(world_z * q, &pitch, &roll, &yaw);
+
+ EXPECT_NEAR(0.f, pitch, NEAR);
+ EXPECT_NEAR(angle, roll, NEAR);
+
+ // Convert extrinsic (world-based) active rotations to a sequence of
+ // intrinsic rotations (each rotation based off of previous rotation
+ // frame).
+ //
+ // R_1 * R_intrinsic = R_extrinsic * R_1
+ // implies
+ // R_intrinsic = (R_1)^-1 R_extrinsic R_1
+ //
+ auto world_z_intrinsic = rotationVectorToQuaternion(
+ q.inverse() * Vector3f(0.f, 0.f, rot_angle));
+
+ // Sequential intrinsic rotations compose as R_1 * R_2.
+ quaternionToAngles(q * world_z_intrinsic, &pitch, &roll, &yaw);
+
+ EXPECT_NEAR(0.f, pitch, NEAR);
+ EXPECT_NEAR(angle, roll, NEAR);
+ }
+ }
+}
+
} // namespace
} // namespace media
} // namespace android
diff --git a/media/libheadtracking/include/media/QuaternionUtil.h b/media/libheadtracking/include/media/QuaternionUtil.h
index f7a2ca9..a711d17 100644
--- a/media/libheadtracking/include/media/QuaternionUtil.h
+++ b/media/libheadtracking/include/media/QuaternionUtil.h
@@ -15,7 +15,9 @@
*/
#pragma once
+#include <android-base/stringprintf.h>
#include <Eigen/Geometry>
+#include <media/Pose.h>
namespace android {
namespace media {
@@ -52,5 +54,244 @@
*/
Eigen::Quaternionf rotateZ(float angle);
+/**
+ * Compute separate roll, pitch, and yaw angles from a quaternion
+ *
+ * The roll, pitch, and yaw follow standard 3DOF virtual reality definitions
+ * with angles increasing counter-clockwise by the right hand rule.
+ *
+ * https://en.wikipedia.org/wiki/Six_degrees_of_freedom
+ *
+ * The roll, pitch, and yaw angles are calculated separately from the device frame
+ * rotation from the world frame. This is not to be confused with the
+ * intrinsic Euler xyz roll, pitch, yaw 'nautical' angles.
+ *
+ * The input quarternion is the active rotation that transforms the
+ * World/Stage frame to the Head/Screen frame.
+ *
+ * The input quaternion may come from two principal sensors: DEVICE and HEADSET
+ * and are interpreted as below.
+ *
+ * DEVICE SENSOR
+ *
+ * Android sensor stack assumes device coordinates along the x/y axis.
+ *
+ * https://developer.android.com/reference/android/hardware/SensorEvent#sensor.type_rotation_vector:
+ *
+ * Looking down from the clouds. Android Device coordinate system (not used)
+ * DEVICE --> X (Y goes through top speaker towards the observer)
+ * | Z
+ * V
+ * USER
+ *
+ * Internally within this library, we transform the device sensor coordinate
+ * system by rotating the coordinate system around the X axis by -M_PI/2.
+ * This aligns the device coordinate system to match that of the
+ * Head Tracking sensor (see below), should the user be facing the device in
+ * natural (phone == portrait, tablet == ?) orientation.
+ *
+ * Looking down from the clouds. Spatializer device frame.
+ * Y
+ * ^
+ * |
+ * DEVICE --> X (Z goes through top of the DEVICE towards the observer)
+ *
+ * USER
+ *
+ * The reference world frame is the device in vertical
+ * natural (phone == portrait) orientation with the top pointing straight
+ * up from the ground and the front-to-back direction facing north.
+ * The world frame is presumed locally fixed by magnetic and gravitational reference.
+ *
+ * HEADSET SENSOR
+ * https://developer.android.com/reference/android/hardware/SensorEvent#sensor.type_head_tracker:
+ *
+ * Looking down from the clouds. Headset frame.
+ * Y
+ * ^
+ * |
+ * USER ---> X
+ * (Z goes through the top of the USER head towards the observer)
+ *
+ * The Z axis goes from the neck to the top of the head, the X axis goes
+ * from the left ear to the right ear, the Y axis goes from the back of the
+ * head through the nose.
+ *
+ * Typically for a headset sensor, the X and Y axes have some arbitrary fixed
+ * reference.
+ *
+ * ROLL
+ * Roll is the counter-clockwise L/R motion around the Y axis (hence ZX plane).
+ * The right hand convention means the plane is ZX not XZ.
+ * This can be considered the azimuth angle in spherical coordinates
+ * with Pitch being the elevation angle.
+ *
+ * Roll has a range of -M_PI to M_PI radians.
+ *
+ * Rolling a device changes between portrait and landscape
+ * modes, and for L/R speakers will limit the amount of crosstalk cancellation.
+ * Roll increases as the device (if vertical like a coin) rolls from left to right.
+ *
+ * By this definition, Roll is less accurate when the device is flat
+ * on a table rather than standing on edge.
+ * When perfectly flat on the table, roll may report as 0, M_PI, or -M_PI
+ * due ambiguity / degeneracy of atan(0, 0) in this case (the device Y axis aligns with
+ * the world Z axis), but exactly flat rarely occurs.
+ *
+ * Roll for a headset is the angle the head is inclined to the right side
+ * (like sleeping).
+ *
+ * PITCH
+ * Pitch is the Surface normal Y deviation (along the Z axis away from the earth).
+ * This can be considered the elevation angle in spherical coordinates using
+ * Roll as the azimuth angle.
+ *
+ * Pitch for a device determines whether the device is "upright" or lying
+ * flat on the table (i.e. surface normal). Pitch is 0 when upright, decreases
+ * as the device top moves away from the user to -M_PI/2 when lying down face up.
+ * Pitch increases from 0 to M_PI/2 when the device tilts towards the user, and is
+ * M_PI/2 degrees when face down.
+ *
+ * Pitch for a headset is the user tilting the head/chin up or down,
+ * like nodding.
+ *
+ * Pitch has a range of -M_PI/2, M_PI/2 radians.
+ *
+ * YAW
+ * Yaw is the rotational component along the earth's XY tangential plane,
+ * where the Z axis points radially away from the earth.
+ *
+ * Yaw has a range of -M_PI to M_PI radians. If used for azimuth angle in
+ * spherical coordinates, the elevation angle may be derived from the Z axis.
+ *
+ * A positive increase means the phone is rotating from right to left
+ * when considered flat on the table.
+ * (headset: the user is rotating their head to look left).
+ * If left speaker or right earbud is pointing straight up or down,
+ * this value is imprecise and Pitch or Roll is a more useful measure.
+ *
+ * Yaw for a device is like spinning a vertical device along the axis of
+ * gravity, like spinning a coin. Yaw increases as the coin / device
+ * spins from right to left, rotating around the Z axis.
+ *
+ * Yaw for a headset is the user turning the head to look left or right
+ * like shaking the head for no. Yaw is the primary angle for a binaural
+ * head tracking device.
+ *
+ * @param q input active rotation Eigen quaternion.
+ * @param pitch output set to pitch if not nullptr
+ * @param roll output set to roll if not nullptr
+ * @param yaw output set to yaw if not nullptr
+ * @return (DEBUG==true) a debug string with intermediate transformation matrix
+ * interpreted as the unit basis vectors.
+ */
+
+// DEBUG returns a debug string for analysis.
+// We save unneeded rotation matrix computation by keeping the DEBUG option constexpr.
+template <bool DEBUG = false>
+auto quaternionToAngles(const Eigen::Quaternionf& q, float *pitch, float *roll, float *yaw) {
+ /*
+ * The quaternion here is the active rotation that transforms from the world frame
+ * to the device frame: the observer remains in the world frame,
+ * and the device (frame) moves.
+ *
+ * We use this to map device coordinates to world coordinates.
+ *
+ * Device: We transform the device right speaker (X == 1), top speaker (Z == 1),
+ * and surface inwards normal (Y == 1) positions to the world frame.
+ *
+ * Headset: We transform the headset right bud (X == 1), top (Z == 1) and
+ * nose normal (Y == 1) positions to the world frame.
+ *
+ * This is the same as the world frame coordinates of the
+ * unit device vector in the X dimension (ux),
+ * unit device vector in the Y dimension (uy),
+ * unit device vector in the Z dimension (uz).
+ *
+ * Rather than doing the rotation on unit vectors individually,
+ * one can simply use the columns of the rotation matrix of
+ * the world-to-body quaternion, so the computation is exceptionally fast.
+ *
+ * Furthermore, Eigen inlines the "toRotationMatrix" method
+ * and we rely on unused expression removal for efficiency
+ * and any elements not used should not be computed.
+ *
+ * Side note: For applying a rotation to several points,
+ * it is more computationally efficient to extract and
+ * use the rotation matrix form than the quaternion.
+ * So use of the rotation matrix is good for many reasons.
+ */
+ const auto rotation = q.toRotationMatrix();
+
+ /*
+ * World location of unit vector right speaker assuming the phone is situated
+ * natural (phone == portrait) mode.
+ * (headset: right bud).
+ *
+ * auto ux = q.rotation() * Eigen::Vector3f{1.f, 0.f, 0.f};
+ * = rotation.col(0);
+ */
+ [[maybe_unused]] const auto ux_0 = rotation.coeff(0, 0);
+ [[maybe_unused]] const auto ux_1 = rotation.coeff(1, 0);
+ [[maybe_unused]] const auto ux_2 = rotation.coeff(2, 0);
+
+ [[maybe_unused]] std::string coordinates;
+ if constexpr (DEBUG) {
+ base::StringAppendF(&coordinates, "ux: %f %f %f", ux_0, ux_1, ux_2);
+ }
+
+ /*
+ * World location of screen-inwards normal assuming the phone is situated
+ * in natural (phone == portrait) mode.
+ * (headset: user nose).
+ *
+ * auto uy = q.rotation() * Eigen::Vector3f{0.f, 1.f, 0.f};
+ * = rotation.col(1);
+ */
+ [[maybe_unused]] const auto uy_0 = rotation.coeff(0, 1);
+ [[maybe_unused]] const auto uy_1 = rotation.coeff(1, 1);
+ [[maybe_unused]] const auto uy_2 = rotation.coeff(2, 1);
+ if constexpr (DEBUG) {
+ base::StringAppendF(&coordinates, "uy: %f %f %f", uy_0, uy_1, uy_2);
+ }
+
+ /*
+ * World location of unit vector top speaker.
+ * (headset: top of head).
+ * auto uz = q.rotation() * Eigen::Vector3f{0.f, 0.f, 1.f};
+ * = rotation.col(2);
+ */
+ [[maybe_unused]] const auto uz_0 = rotation.coeff(0, 2);
+ [[maybe_unused]] const auto uz_1 = rotation.coeff(1, 2);
+ [[maybe_unused]] const auto uz_2 = rotation.coeff(2, 2);
+ if constexpr (DEBUG) {
+ base::StringAppendF(&coordinates, "uz: %f %f %f", uz_0, uz_1, uz_2);
+ }
+
+ // pitch computed from nose world Z coordinate;
+ // hence independent of rotation around world Z.
+ if (pitch != nullptr) {
+ *pitch = asin(std::clamp(uy_2, -1.f, 1.f));
+ }
+
+ // roll computed from head/right world Z coordinate;
+ // hence independent of rotation around world Z.
+ if (roll != nullptr) {
+ // atan2 takes care of implicit scale normalization of Z, X.
+ *roll = -atan2(ux_2, uz_2);
+ }
+
+ // yaw computed from right ear angle projected onto world XY plane
+ // where world Z == 0. This is the rotation around world Z.
+ if (yaw != nullptr) {
+ // atan2 takes care of implicit scale normalization of X, Y.
+ *yaw = atan2(ux_1, ux_0);
+ }
+
+ if constexpr (DEBUG) {
+ return coordinates;
+ }
+}
+
} // namespace media
} // namespace android
diff --git a/services/audiopolicy/service/Spatializer.cpp b/services/audiopolicy/service/Spatializer.cpp
index cf9543c..2f65f39 100644
--- a/services/audiopolicy/service/Spatializer.cpp
+++ b/services/audiopolicy/service/Spatializer.cpp
@@ -33,6 +33,7 @@
#include <media/stagefright/foundation/AHandler.h>
#include <media/stagefright/foundation/AMessage.h>
#include <media/MediaMetricsItem.h>
+#include <media/QuaternionUtil.h>
#include <media/ShmemCompat.h>
#include <mediautils/SchedulingPolicyService.h>
#include <mediautils/ServiceUtilities.h>
@@ -75,13 +76,21 @@
return maxMask;
}
-std::vector<float> recordFromRotationVector(const std::vector<float>& rotationVector) {
+static std::vector<float> recordFromTranslationRotationVector(
+ const std::vector<float>& trVector) {
+ auto headToStageOpt = Pose3f::fromVector(trVector);
+ if (!headToStageOpt) return {};
+
+ const auto stageToHead = headToStageOpt.value().inverse();
+ const auto stageToHeadTranslation = stageToHead.translation();
constexpr float RAD_TO_DEGREE = 180.f / M_PI;
std::vector<float> record{
- rotationVector[0], rotationVector[1], rotationVector[2],
- rotationVector[3] * RAD_TO_DEGREE,
- rotationVector[4] * RAD_TO_DEGREE,
- rotationVector[5] * RAD_TO_DEGREE};
+ stageToHeadTranslation[0], stageToHeadTranslation[1], stageToHeadTranslation[2],
+ 0.f, 0.f, 0.f};
+ media::quaternionToAngles(stageToHead.rotation(), &record[3], &record[4], &record[5]);
+ record[3] *= RAD_TO_DEGREE;
+ record[4] *= RAD_TO_DEGREE;
+ record[5] *= RAD_TO_DEGREE;
return record;
}
@@ -747,8 +756,9 @@
callback = mHeadTrackingCallback;
if (mEngine != nullptr) {
setEffectParameter_l(SPATIALIZER_PARAM_HEAD_TO_STAGE, headToStage);
- mPoseRecorder.record(headToStage);
- mPoseDurableRecorder.record(headToStage);
+ const auto record = recordFromTranslationRotationVector(headToStage);
+ mPoseRecorder.record(record);
+ mPoseDurableRecorder.record(record);
}
}
@@ -1024,8 +1034,7 @@
}
std::string Spatializer::toString(unsigned level) const {
- std::string prefixSpace;
- prefixSpace.append(level, ' ');
+ std::string prefixSpace(level, ' ');
std::string ss = prefixSpace + "Spatializer:\n";
bool needUnlock = false;
@@ -1081,14 +1090,15 @@
// PostController dump.
if (mPoseController != nullptr) {
- ss += mPoseController->toString(level + 1);
- ss.append(prefixSpace +
- "Sensor data format - [rx, ry, rz, vx, vy, vz] (units-degree, "
- "r-transform, v-angular velocity, x-pitch, y-roll, z-yaw):\n");
- ss.append(prefixSpace + " PerMinuteHistory:\n");
- ss += mPoseDurableRecorder.toString(level + 1);
- ss.append(prefixSpace + " PerSecondHistory:\n");
- ss += mPoseRecorder.toString(level + 1);
+ ss.append(mPoseController->toString(level + 1))
+ .append(prefixSpace)
+ .append("Pose (active stage-to-head) [tx, ty, tz, pitch, roll, yaw]:\n")
+ .append(prefixSpace)
+ .append(" PerMinuteHistory:\n")
+ .append(mPoseDurableRecorder.toString(level + 2))
+ .append(prefixSpace)
+ .append(" PerSecondHistory:\n")
+ .append(mPoseRecorder.toString(level + 2));
} else {
ss.append(prefixSpace).append("SpatializerPoseController not exist\n");
}