Merge "MPEG4Extractor: support MPEG-H 3D Audio (2/2)" am: bb67898668
Original change: https://android-review.googlesource.com/c/platform/frameworks/av/+/1671306
Change-Id: I5493791fee7e783bc2bdfe9c3e0beb39165b46be
diff --git a/media/extractors/mp4/MPEG4Extractor.cpp b/media/extractors/mp4/MPEG4Extractor.cpp
index 1bc8c63..f725a8c 100644
--- a/media/extractors/mp4/MPEG4Extractor.cpp
+++ b/media/extractors/mp4/MPEG4Extractor.cpp
@@ -62,6 +62,16 @@
#define ALAC_SPECIFIC_INFO_SIZE (36)
+// TODO : Remove the defines once mainline media is built against NDK >= 31.
+// The mp4 extractor is part of mainline and builds against NDK 29 as of
+// writing. These keys are available only from NDK 31:
+#define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
+ "mpegh-profile-level-indication"
+#define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
+ "mpegh-reference-channel-layout"
+#define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
+ "mpegh-compatible-sets"
+
namespace android {
enum {
@@ -139,6 +149,7 @@
bool mIsHEVC;
bool mIsDolbyVision;
bool mIsAC4;
+ bool mIsMpegH = false;
bool mIsPcm;
size_t mNALLengthSize;
@@ -377,6 +388,10 @@
case FOURCC(".mp3"):
case 0x6D730055: // "ms U" mp3 audio
return MEDIA_MIMETYPE_AUDIO_MPEG;
+ case FOURCC("mha1"):
+ return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
+ case FOURCC("mhm1"):
+ return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
default:
ALOGW("Unknown fourcc: %c%c%c%c",
(fourcc >> 24) & 0xff,
@@ -1760,6 +1775,8 @@
case FOURCC("fLaC"):
case FOURCC(".mp3"):
case 0x6D730055: // "ms U" mp3 audio
+ case FOURCC("mha1"):
+ case FOURCC("mhm1"):
{
if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
@@ -1959,7 +1976,94 @@
}
break;
}
+ case FOURCC("mhaC"):
+ {
+ // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
+ constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
+ + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
+ + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
+ uint8_t mhac_header[mhac_header_size];
+ off64_t data_offset = *offset;
+ if (chunk_size < sizeof(mhac_header)) {
+ return ERROR_MALFORMED;
+ }
+
+ if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
+ < (ssize_t)sizeof(mhac_header)) {
+ return ERROR_IO;
+ }
+
+ //get mpegh3daProfileLevelIndication
+ const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
+ AMediaFormat_setInt32(mLastTrack->meta,
+ AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
+ mpegh3daProfileLevelIndication);
+
+ //get referenceChannelLayout
+ const uint32_t referenceChannelLayout = mhac_header[10];
+ AMediaFormat_setInt32(mLastTrack->meta,
+ AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
+ referenceChannelLayout);
+
+ // get mpegh3daConfigLength
+ const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
+ if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
+ return ERROR_MALFORMED;
+ }
+
+ data_offset += sizeof(mhac_header);
+ uint8_t mhac_config[mhac_config_size];
+ if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
+ < (ssize_t)sizeof(mhac_config)) {
+ return ERROR_IO;
+ }
+
+ AMediaFormat_setBuffer(mLastTrack->meta,
+ AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
+ data_offset += sizeof(mhac_config);
+ *offset = data_offset;
+ break;
+ }
+ case FOURCC("mhaP"):
+ {
+ // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
+ constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
+ + 1 /* numCompatibleSets */;
+
+ uint8_t mhap_header[mhap_header_size];
+ off64_t data_offset = *offset;
+
+ if (chunk_size < (ssize_t)mhap_header_size) {
+ return ERROR_MALFORMED;
+ }
+
+ if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
+ < (ssize_t)sizeof(mhap_header)) {
+ return ERROR_IO;
+ }
+
+ // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
+ const uint32_t mhap_compatible_sets_size = mhap_header[8];
+ if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
+ return ERROR_MALFORMED;
+ }
+
+ data_offset += sizeof(mhap_header);
+ uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
+ if (mDataSource->readAt(
+ data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
+ < (ssize_t)sizeof(mhap_compatible_sets)) {
+ return ERROR_IO;
+ }
+
+ AMediaFormat_setBuffer(mLastTrack->meta,
+ AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
+ mhap_compatible_sets, sizeof(mhap_compatible_sets));
+ data_offset += sizeof(mhap_compatible_sets);
+ *offset = data_offset;
+ break;
+ }
case FOURCC("mp4v"):
case FOURCC("encv"):
case FOURCC("s263"):
@@ -4939,6 +5043,8 @@
bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
CHECK(success);
+ mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
+ !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
@@ -6008,10 +6114,11 @@
}
uint32_t syncSampleIndex = sampleIndex;
- // assume every non-USAC audio sample is a sync sample. This works around
+ // assume every non-USAC/non-MPEGH audio sample is a sync sample.
+ // This works around
// seek issues with files that were incorrectly written with an
// empty or single-sample stss block for the audio track
- if (err == OK && (!mIsAudio || mIsUsac)) {
+ if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
err = mSampleTable->findSyncSampleNear(
sampleIndex, &syncSampleIndex, findFlags);
}
diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp
index 1c4f5ac..bd9a694 100644
--- a/media/libstagefright/Utils.cpp
+++ b/media/libstagefright/Utils.cpp
@@ -47,6 +47,16 @@
#include <media/AudioParameter.h>
#include <system/audio.h>
+// TODO : Remove the defines once mainline media is built against NDK >= 31.
+// The mp4 extractor is part of mainline and builds against NDK 29 as of
+// writing. These keys are available only from NDK 31:
+#define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
+ "mpegh-profile-level-indication"
+#define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
+ "mpegh-reference-channel-layout"
+#define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
+ "mpegh-compatible-sets"
+
namespace android {
static status_t copyNALUToABuffer(sp<ABuffer> *buffer, const uint8_t *ptr, size_t length) {
@@ -1078,6 +1088,25 @@
msg->setInt32("is-adts", isADTS);
}
+ int32_t mpeghProfileLevelIndication;
+ if (meta->findInt32(kKeyMpeghProfileLevelIndication, &mpeghProfileLevelIndication)) {
+ msg->setInt32(AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
+ mpeghProfileLevelIndication);
+ }
+ int32_t mpeghReferenceChannelLayout;
+ if (meta->findInt32(kKeyMpeghReferenceChannelLayout, &mpeghReferenceChannelLayout)) {
+ msg->setInt32(AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
+ mpeghReferenceChannelLayout);
+ }
+ if (meta->findData(kKeyMpeghCompatibleSets, &type, &data, &size)) {
+ sp<ABuffer> buffer = new (std::nothrow) ABuffer(size);
+ if (buffer.get() == NULL || buffer->base() == NULL) {
+ return NO_MEMORY;
+ }
+ msg->setBuffer(AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS, buffer);
+ memcpy(buffer->data(), data, size);
+ }
+
int32_t aacProfile = -1;
if (meta->findInt32(kKeyAACAOT, &aacProfile)) {
msg->setInt32("aac-profile", aacProfile);
@@ -1837,6 +1866,23 @@
meta->setInt32(kKeyIsADTS, isADTS);
}
+ int32_t mpeghProfileLevelIndication = -1;
+ if (msg->findInt32(AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
+ &mpeghProfileLevelIndication)) {
+ meta->setInt32(kKeyMpeghProfileLevelIndication, mpeghProfileLevelIndication);
+ }
+ int32_t mpeghReferenceChannelLayout = -1;
+ if (msg->findInt32(AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
+ &mpeghReferenceChannelLayout)) {
+ meta->setInt32(kKeyMpeghReferenceChannelLayout, mpeghReferenceChannelLayout);
+ }
+ sp<ABuffer> mpeghCompatibleSets;
+ if (msg->findBuffer(AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
+ &mpeghCompatibleSets)) {
+ meta->setData(kKeyMpeghCompatibleSets, kTypeHCOS,
+ mpeghCompatibleSets->data(), mpeghCompatibleSets->size());
+ }
+
int32_t aacProfile = -1;
if (msg->findInt32("aac-profile", &aacProfile)) {
meta->setInt32(kKeyAACAOT, aacProfile);
diff --git a/media/libstagefright/include/media/stagefright/MetaDataBase.h b/media/libstagefright/include/media/stagefright/MetaDataBase.h
index 6f21a80..aae5ef9 100644
--- a/media/libstagefright/include/media/stagefright/MetaDataBase.h
+++ b/media/libstagefright/include/media/stagefright/MetaDataBase.h
@@ -152,6 +152,10 @@
kKeyIsADTS = 'adts', // bool (int32_t)
kKeyAACAOT = 'aaot', // int32_t
+ kKeyMpeghProfileLevelIndication = 'hpli', // int32_t
+ kKeyMpeghReferenceChannelLayout = 'hrcl', // int32_t
+ kKeyMpeghCompatibleSets = 'hcos', // raw data
+
// If a MediaBuffer's data represents (at least partially) encrypted
// data, the following fields aid in decryption.
// The data can be thought of as pairs of plain and encrypted data
@@ -265,6 +269,7 @@
kTypeAV1C = 'av1c',
kTypeDVCC = 'dvcc',
kTypeD263 = 'd263',
+ kTypeHCOS = 'hcos',
};
enum {