Merge "MPEG4Extractor: support MPEG-H 3D Audio (2/2)" am: bb67898668

Original change: https://android-review.googlesource.com/c/platform/frameworks/av/+/1671306

Change-Id: I5493791fee7e783bc2bdfe9c3e0beb39165b46be
diff --git a/media/extractors/mp4/MPEG4Extractor.cpp b/media/extractors/mp4/MPEG4Extractor.cpp
index 1bc8c63..f725a8c 100644
--- a/media/extractors/mp4/MPEG4Extractor.cpp
+++ b/media/extractors/mp4/MPEG4Extractor.cpp
@@ -62,6 +62,16 @@
 
 #define ALAC_SPECIFIC_INFO_SIZE (36)
 
+// TODO : Remove the defines once mainline media is built against NDK >= 31.
+// The mp4 extractor is part of mainline and builds against NDK 29 as of
+// writing. These keys are available only from NDK 31:
+#define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
+  "mpegh-profile-level-indication"
+#define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
+  "mpegh-reference-channel-layout"
+#define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
+  "mpegh-compatible-sets"
+
 namespace android {
 
 enum {
@@ -139,6 +149,7 @@
     bool mIsHEVC;
     bool mIsDolbyVision;
     bool mIsAC4;
+    bool mIsMpegH = false;
     bool mIsPcm;
     size_t mNALLengthSize;
 
@@ -377,6 +388,10 @@
         case FOURCC(".mp3"):
         case 0x6D730055: // "ms U" mp3 audio
             return MEDIA_MIMETYPE_AUDIO_MPEG;
+        case FOURCC("mha1"):
+            return MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1;
+        case FOURCC("mhm1"):
+            return MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1;
         default:
             ALOGW("Unknown fourcc: %c%c%c%c",
                    (fourcc >> 24) & 0xff,
@@ -1760,6 +1775,8 @@
         case FOURCC("fLaC"):
         case FOURCC(".mp3"):
         case 0x6D730055: // "ms U" mp3 audio
+        case FOURCC("mha1"):
+        case FOURCC("mhm1"):
         {
             if (mIsQT && depth >= 1 && mPath[depth - 1] == FOURCC("wave")) {
 
@@ -1959,7 +1976,94 @@
             }
             break;
         }
+        case FOURCC("mhaC"):
+        {
+            // See ISO_IEC_23008-3;2019 MHADecoderConfigurationRecord
+            constexpr uint32_t mhac_header_size = 4 /* size */ + 4 /* boxtype 'mhaC' */
+                    + 1 /* configurationVersion */ + 1 /* mpegh3daProfileLevelIndication */
+                    + 1 /* referenceChannelLayout */ + 2 /* mpegh3daConfigLength */;
+            uint8_t mhac_header[mhac_header_size];
+            off64_t data_offset = *offset;
 
+            if (chunk_size < sizeof(mhac_header)) {
+                return ERROR_MALFORMED;
+            }
+
+            if (mDataSource->readAt(data_offset, mhac_header, sizeof(mhac_header))
+                    < (ssize_t)sizeof(mhac_header)) {
+                return ERROR_IO;
+            }
+
+            //get mpegh3daProfileLevelIndication
+            const uint32_t mpegh3daProfileLevelIndication = mhac_header[9];
+            AMediaFormat_setInt32(mLastTrack->meta,
+                    AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
+                    mpegh3daProfileLevelIndication);
+
+             //get referenceChannelLayout
+            const uint32_t referenceChannelLayout = mhac_header[10];
+            AMediaFormat_setInt32(mLastTrack->meta,
+                    AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
+                    referenceChannelLayout);
+
+            // get mpegh3daConfigLength
+            const uint32_t mhac_config_size = U16_AT(&mhac_header[11]);
+            if (chunk_size != sizeof(mhac_header) + mhac_config_size) {
+                return ERROR_MALFORMED;
+            }
+
+            data_offset += sizeof(mhac_header);
+            uint8_t mhac_config[mhac_config_size];
+            if (mDataSource->readAt(data_offset, mhac_config, sizeof(mhac_config))
+                    < (ssize_t)sizeof(mhac_config)) {
+                return ERROR_IO;
+            }
+
+            AMediaFormat_setBuffer(mLastTrack->meta,
+                    AMEDIAFORMAT_KEY_CSD_0, mhac_config, sizeof(mhac_config));
+            data_offset += sizeof(mhac_config);
+            *offset = data_offset;
+            break;
+        }
+        case FOURCC("mhaP"):
+        {
+            // FDAmd_2 of ISO_IEC_23008-3;2019 MHAProfileAndLevelCompatibilitySetBox
+            constexpr uint32_t mhap_header_size = 4 /* size */ + 4 /* boxtype 'mhaP' */
+                    + 1 /* numCompatibleSets */;
+
+            uint8_t mhap_header[mhap_header_size];
+            off64_t data_offset = *offset;
+
+            if (chunk_size < (ssize_t)mhap_header_size) {
+                return ERROR_MALFORMED;
+            }
+
+            if (mDataSource->readAt(data_offset, mhap_header, sizeof(mhap_header))
+                    < (ssize_t)sizeof(mhap_header)) {
+                return ERROR_IO;
+            }
+
+            // mhap_compatible_sets_size = numCompatibleSets * sizeof(uint8_t)
+            const uint32_t mhap_compatible_sets_size = mhap_header[8];
+            if (chunk_size != sizeof(mhap_header) + mhap_compatible_sets_size) {
+                return ERROR_MALFORMED;
+            }
+
+            data_offset += sizeof(mhap_header);
+            uint8_t mhap_compatible_sets[mhap_compatible_sets_size];
+            if (mDataSource->readAt(
+                    data_offset, mhap_compatible_sets, sizeof(mhap_compatible_sets))
+                            < (ssize_t)sizeof(mhap_compatible_sets)) {
+                return ERROR_IO;
+            }
+
+            AMediaFormat_setBuffer(mLastTrack->meta,
+                    AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
+                    mhap_compatible_sets, sizeof(mhap_compatible_sets));
+            data_offset += sizeof(mhap_compatible_sets);
+            *offset = data_offset;
+            break;
+        }
         case FOURCC("mp4v"):
         case FOURCC("encv"):
         case FOURCC("s263"):
@@ -4939,6 +5043,8 @@
     bool success = AMediaFormat_getString(mFormat, AMEDIAFORMAT_KEY_MIME, &mime);
     CHECK(success);
 
+    mIsMpegH = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHA1) ||
+               !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_MPEGH_MHM1);
     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
     mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
               !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
@@ -6008,10 +6114,11 @@
             }
 
             uint32_t syncSampleIndex = sampleIndex;
-            // assume every non-USAC audio sample is a sync sample. This works around
+            // assume every non-USAC/non-MPEGH audio sample is a sync sample.
+            // This works around
             // seek issues with files that were incorrectly written with an
             // empty or single-sample stss block for the audio track
-            if (err == OK && (!mIsAudio || mIsUsac)) {
+            if (err == OK && (!mIsAudio || mIsUsac || mIsMpegH)) {
                 err = mSampleTable->findSyncSampleNear(
                         sampleIndex, &syncSampleIndex, findFlags);
             }
diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp
index 1c4f5ac..bd9a694 100644
--- a/media/libstagefright/Utils.cpp
+++ b/media/libstagefright/Utils.cpp
@@ -47,6 +47,16 @@
 #include <media/AudioParameter.h>
 #include <system/audio.h>
 
+// TODO : Remove the defines once mainline media is built against NDK >= 31.
+// The mp4 extractor is part of mainline and builds against NDK 29 as of
+// writing. These keys are available only from NDK 31:
+#define AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION \
+  "mpegh-profile-level-indication"
+#define AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT \
+  "mpegh-reference-channel-layout"
+#define AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS \
+  "mpegh-compatible-sets"
+
 namespace android {
 
 static status_t copyNALUToABuffer(sp<ABuffer> *buffer, const uint8_t *ptr, size_t length) {
@@ -1078,6 +1088,25 @@
             msg->setInt32("is-adts", isADTS);
         }
 
+        int32_t mpeghProfileLevelIndication;
+        if (meta->findInt32(kKeyMpeghProfileLevelIndication, &mpeghProfileLevelIndication)) {
+            msg->setInt32(AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
+                    mpeghProfileLevelIndication);
+        }
+        int32_t mpeghReferenceChannelLayout;
+        if (meta->findInt32(kKeyMpeghReferenceChannelLayout, &mpeghReferenceChannelLayout)) {
+            msg->setInt32(AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
+                    mpeghReferenceChannelLayout);
+        }
+        if (meta->findData(kKeyMpeghCompatibleSets, &type, &data, &size)) {
+            sp<ABuffer> buffer = new (std::nothrow) ABuffer(size);
+            if (buffer.get() == NULL || buffer->base() == NULL) {
+                return NO_MEMORY;
+            }
+            msg->setBuffer(AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS, buffer);
+            memcpy(buffer->data(), data, size);
+        }
+
         int32_t aacProfile = -1;
         if (meta->findInt32(kKeyAACAOT, &aacProfile)) {
             msg->setInt32("aac-profile", aacProfile);
@@ -1837,6 +1866,23 @@
             meta->setInt32(kKeyIsADTS, isADTS);
         }
 
+        int32_t mpeghProfileLevelIndication = -1;
+        if (msg->findInt32(AMEDIAFORMAT_KEY_MPEGH_PROFILE_LEVEL_INDICATION,
+                &mpeghProfileLevelIndication)) {
+            meta->setInt32(kKeyMpeghProfileLevelIndication, mpeghProfileLevelIndication);
+        }
+        int32_t mpeghReferenceChannelLayout = -1;
+        if (msg->findInt32(AMEDIAFORMAT_KEY_MPEGH_REFERENCE_CHANNEL_LAYOUT,
+                &mpeghReferenceChannelLayout)) {
+            meta->setInt32(kKeyMpeghReferenceChannelLayout, mpeghReferenceChannelLayout);
+        }
+        sp<ABuffer> mpeghCompatibleSets;
+        if (msg->findBuffer(AMEDIAFORMAT_KEY_MPEGH_COMPATIBLE_SETS,
+                &mpeghCompatibleSets)) {
+            meta->setData(kKeyMpeghCompatibleSets, kTypeHCOS,
+                    mpeghCompatibleSets->data(), mpeghCompatibleSets->size());
+        }
+
         int32_t aacProfile = -1;
         if (msg->findInt32("aac-profile", &aacProfile)) {
             meta->setInt32(kKeyAACAOT, aacProfile);
diff --git a/media/libstagefright/include/media/stagefright/MetaDataBase.h b/media/libstagefright/include/media/stagefright/MetaDataBase.h
index 6f21a80..aae5ef9 100644
--- a/media/libstagefright/include/media/stagefright/MetaDataBase.h
+++ b/media/libstagefright/include/media/stagefright/MetaDataBase.h
@@ -152,6 +152,10 @@
     kKeyIsADTS            = 'adts',  // bool (int32_t)
     kKeyAACAOT            = 'aaot',  // int32_t
 
+    kKeyMpeghProfileLevelIndication = 'hpli', // int32_t
+    kKeyMpeghReferenceChannelLayout = 'hrcl', // int32_t
+    kKeyMpeghCompatibleSets         = 'hcos', // raw data
+
     // If a MediaBuffer's data represents (at least partially) encrypted
     // data, the following fields aid in decryption.
     // The data can be thought of as pairs of plain and encrypted data
@@ -265,6 +269,7 @@
     kTypeAV1C        = 'av1c',
     kTypeDVCC        = 'dvcc',
     kTypeD263        = 'd263',
+    kTypeHCOS        = 'hcos',
 };
 
 enum {