Adding dolby-vision boxes to MPEG4Extractor

Bug: 129367182
Test: build, cts

Change-Id: I5bff3f6c626730017d6febef384893ab194b5927
diff --git a/media/extractors/mp4/MPEG4Extractor.cpp b/media/extractors/mp4/MPEG4Extractor.cpp
index b91d16f..841ee0e 100755
--- a/media/extractors/mp4/MPEG4Extractor.cpp
+++ b/media/extractors/mp4/MPEG4Extractor.cpp
@@ -133,6 +133,7 @@
 
     bool mIsAVC;
     bool mIsHEVC;
+    bool mIsDolbyVision;
     bool mIsAC4;
     bool mIsPcm;
     size_t mNALLengthSize;
@@ -337,6 +338,13 @@
         case FOURCC("hvc1"):
         case FOURCC("hev1"):
             return MEDIA_MIMETYPE_VIDEO_HEVC;
+
+        case FOURCC("dvav"):
+        case FOURCC("dva1"):
+        case FOURCC("dvhe"):
+        case FOURCC("dvh1"):
+            return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
+
         case FOURCC("ac-4"):
             return MEDIA_MIMETYPE_AUDIO_AC4;
         case FOURCC("Opus"):
@@ -1062,6 +1070,59 @@
                     mLastTrack->mTx3gBuffer = NULL;
                 }
 
+                const char *mime;
+                AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
+
+                if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
+                    void *data;
+                    size_t size;
+
+                    if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
+                        const uint8_t *ptr = (const uint8_t *)data;
+                        const uint8_t profile = ptr[2] >> 1;
+                        const uint8_t bl_compatibility_id = (ptr[4]) >> 4;
+
+                        if (4 == profile || 7 == profile ||
+                                (profile >= 8 && profile < 10 && bl_compatibility_id)) {
+                            // we need a backward compatible track
+                            ALOGV("Adding new backward compatible track");
+                            Track *track_b = new Track;
+
+                            track_b->timescale = mLastTrack->timescale;
+                            track_b->sampleTable = mLastTrack->sampleTable;
+                            track_b->includes_expensive_metadata = mLastTrack->includes_expensive_metadata;
+                            track_b->skipTrack = mLastTrack->skipTrack;
+                            track_b->has_elst = mLastTrack->has_elst;
+                            track_b->elst_media_time = mLastTrack->elst_media_time;
+                            track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
+                            track_b->elstShiftStartTicks = mLastTrack->elstShiftStartTicks;
+                            track_b->subsample_encryption = mLastTrack->subsample_encryption;
+
+                            track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
+                            track_b->mTx3gSize = mLastTrack->mTx3gSize;
+                            track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
+
+                            track_b->meta = AMediaFormat_new();
+                            AMediaFormat_copy(track_b->meta, mLastTrack->meta);
+
+                            mLastTrack->next = track_b;
+                            track_b->next = NULL;
+
+                            auto id = track_b->meta->mFormat->findEntryByName(AMEDIAFORMAT_KEY_CSD_2);
+                            track_b->meta->mFormat->removeEntryAt(id);
+
+                            if (4 == profile || 7 == profile || 8 == profile ) {
+                                AMediaFormat_setString(track_b->meta,
+                                        AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
+                            } else if (9 == profile) {
+                                AMediaFormat_setString(track_b->meta,
+                                        AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
+                            } // Should never get to else part
+
+                            mLastTrack = track_b;
+                        }
+                    }
+                }
             } else if (chunk_type == FOURCC("moov")) {
                 mInitCheck = OK;
 
@@ -1830,6 +1891,10 @@
         case FOURCC("avc1"):
         case FOURCC("hvc1"):
         case FOURCC("hev1"):
+        case FOURCC("dvav"):
+        case FOURCC("dva1"):
+        case FOURCC("dvhe"):
+        case FOURCC("dvh1"):
         case FOURCC("av01"):
         {
             uint8_t buffer[78];
@@ -1984,7 +2049,8 @@
                     // for audio, use 128KB
                     max_size = 1024 * 128;
                 } else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
-                        || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
+                        || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
+                        || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
                     // AVC & HEVC requires compression ratio of at least 2, and uses
                     // macroblocks
                     max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
@@ -2315,6 +2381,30 @@
             *offset += chunk_size;
             break;
         }
+        case FOURCC("dvcC"):
+        case FOURCC("dvvC"): {
+            auto buffer = heapbuffer<uint8_t>(chunk_data_size);
+
+            if (buffer.get() == NULL) {
+                ALOGE("b/28471206");
+                return NO_MEMORY;
+            }
+
+            if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
+                return ERROR_IO;
+            }
+
+            if (mLastTrack == NULL)
+                return ERROR_MALFORMED;
+
+            AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
+                                   buffer.get(), chunk_data_size);
+            AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
+                                   MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
+
+            *offset += chunk_size;
+            break;
+        }
         case FOURCC("d263"):
         {
             *offset += chunk_size;
@@ -4127,7 +4217,20 @@
         if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
             itemTable = mItemTable;
         }
-    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
+    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
+        void *data;
+        size_t size;
+        if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
+            return NULL;
+        }
+
+        const uint8_t *ptr = (const uint8_t *)data;
+
+        if (size != 24 || ptr[0] != 1 || ptr[1] != 0) {
+            // dv_version_major == 1, dv_version_minor == 0;
+            return NULL;
+        }
+   } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
         void *data;
         size_t size;
         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
@@ -4172,6 +4275,10 @@
         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
             return ERROR_MALFORMED;
         }
+    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
+        if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
+            return ERROR_MALFORMED;
+        }
     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
         if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
             return ERROR_MALFORMED;
@@ -4659,6 +4766,7 @@
       mCurrentSampleInfoOffsets(NULL),
       mIsAVC(false),
       mIsHEVC(false),
+      mIsDolbyVision(false),
       mIsAC4(false),
       mIsPcm(false),
       mNALLengthSize(0),
@@ -4698,6 +4806,7 @@
     mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
               !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
     mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
+    mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
 
     if (mIsAVC) {
         void *data;
@@ -4722,7 +4831,42 @@
         CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
 
         mNALLengthSize = 1 + (ptr[14 + 7] & 3);
-    }
+   } else if (mIsDolbyVision) {
+       ALOGV("%s DolbyVision stream detected", __FUNCTION__);
+       void *data;
+       size_t size;
+       CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
+
+       const uint8_t *ptr = (const uint8_t *)data;
+
+       CHECK(size == 24);
+       CHECK_EQ((unsigned)ptr[0], 1u);  // dv_major_version == 1
+       CHECK_EQ((unsigned)ptr[1], 0u);  // dv_minor_version == 0
+
+       const uint8_t profile = ptr[2] >> 1;
+
+       // profile == (0,1,9) --> AVC; profile = (2,3,4,5,6,7,8) --> HEVC;
+       if (profile > 1 &&  profile < 9) {
+
+           CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
+
+           const uint8_t *ptr = (const uint8_t *)data;
+
+           CHECK(size >= 22);
+           CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
+
+           mNALLengthSize = 1 + (ptr[14 + 7] & 3);
+         } else {
+
+           CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
+           const uint8_t *ptr = (const uint8_t *)data;
+
+           CHECK(size >= 7);
+           CHECK_EQ((unsigned)ptr[0], 1u);  // configurationVersion == 1
+           // The number of bytes used to encode the length of a NAL unit.
+           mNALLengthSize = 1 + (ptr[4] & 3);
+       }
+   }
 
     mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
     mIsAudio = !strncasecmp(mime, "audio/", 6);
@@ -5789,7 +5933,7 @@
         }
     }
 
-    if (!mIsAVC && !mIsHEVC && !mIsAC4) {
+    if (!mIsAVC && !mIsHEVC && !mIsDolbyVision && !mIsAC4) {
         if (newBuffer) {
             if (mIsPcm) {
                 // The twos' PCM block reader assumes that all samples has the same size.
@@ -6179,7 +6323,7 @@
         AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
     }
 
-    if (!mIsAVC && !mIsHEVC) {
+    if (!mIsAVC && !mIsHEVC && !mIsDolbyVision) {
         if (newBuffer) {
             if (!isInRange((size_t)0u, mBuffer->size(), size)) {
                 mBuffer->release();
diff --git a/media/libstagefright/MPEG4Writer.cpp b/media/libstagefright/MPEG4Writer.cpp
index f130c9b..bf4e7de 100644
--- a/media/libstagefright/MPEG4Writer.cpp
+++ b/media/libstagefright/MPEG4Writer.cpp
@@ -2213,8 +2213,10 @@
     } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
                !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
         mMeta->findData(kKeyHVCC, &type, &data, &size);
-    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
-            || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
+    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
+        mMeta->findData(kKeyDVCC, &type, &data, &size);
+    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) ||
+               !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
         if (mMeta->findData(kKeyESDS, &type, &data, &size)) {
             ESDS esds(data, size);
             if (esds.getCodecSpecificInfo(&data, &size) == OK &&
diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp
index bda6053..76bbdf5 100644
--- a/media/libstagefright/Utils.cpp
+++ b/media/libstagefright/Utils.cpp
@@ -227,6 +227,66 @@
     }
 }
 
+static void parseDolbyVisionProfileLevelFromDvcc(const uint8_t *ptr, size_t size, sp<AMessage> &format) {
+    // dv_major_version == 1, dv_minor_version == 0
+    if (size < 4 || ptr[0] != 1 || ptr[1] != 0) {
+        return;
+    }
+
+    const uint8_t profile = ptr[2] >> 1;
+    const uint8_t level = ((ptr[2] & 0x1) << 5) | ((ptr[3] >> 3) & 0x1f);
+    const uint8_t rpu_present_flag = (ptr[3] >> 2) & 0x01;
+    const uint8_t el_present_flag = (ptr[3] >> 1) & 0x01;
+    const uint8_t bl_present_flag = (ptr[3] & 0x01);
+    const int32_t bl_compatibility_id = (int32_t)(ptr[4] >> 4);
+
+    ALOGV("profile-level-compatibility value in dv(c|v)c box %d-%d-%d",
+          profile, level, bl_compatibility_id);
+
+    // All Dolby Profiles will have profile and level info in MediaFormat
+    // Profile 8 and 9 will have bl_compatibility_id too.
+    const static ALookup<uint8_t, OMX_VIDEO_DOLBYVISIONPROFILETYPE> profiles{
+        {1, OMX_VIDEO_DolbyVisionProfileDvavPen},
+        {3, OMX_VIDEO_DolbyVisionProfileDvheDen},
+        {4, OMX_VIDEO_DolbyVisionProfileDvheDtr},
+        {5, OMX_VIDEO_DolbyVisionProfileDvheStn},
+        {6, OMX_VIDEO_DolbyVisionProfileDvheDth},
+        {7, OMX_VIDEO_DolbyVisionProfileDvheDtb},
+        {8, OMX_VIDEO_DolbyVisionProfileDvheSt},
+        {9, OMX_VIDEO_DolbyVisionProfileDvavSe},
+    };
+
+    const static ALookup<uint8_t, OMX_VIDEO_DOLBYVISIONLEVELTYPE> levels{
+        {0, OMX_VIDEO_DolbyVisionLevelUnknown},
+        {1, OMX_VIDEO_DolbyVisionLevelHd24},
+        {2, OMX_VIDEO_DolbyVisionLevelHd30},
+        {3, OMX_VIDEO_DolbyVisionLevelFhd24},
+        {4, OMX_VIDEO_DolbyVisionLevelFhd30},
+        {5, OMX_VIDEO_DolbyVisionLevelFhd60},
+        {6, OMX_VIDEO_DolbyVisionLevelUhd24},
+        {7, OMX_VIDEO_DolbyVisionLevelUhd30},
+        {8, OMX_VIDEO_DolbyVisionLevelUhd48},
+        {9, OMX_VIDEO_DolbyVisionLevelUhd60},
+    };
+    // set rpuAssoc
+    if (rpu_present_flag && el_present_flag && !bl_present_flag) {
+        format->setInt32("rpuAssoc", 1);
+    }
+    // set profile & level if they are recognized
+    OMX_VIDEO_DOLBYVISIONPROFILETYPE codecProfile;
+    OMX_VIDEO_DOLBYVISIONLEVELTYPE codecLevel;
+    if (profiles.map(profile, &codecProfile)) {
+        format->setInt32("profile", codecProfile);
+        if (codecProfile == OMX_VIDEO_DolbyVisionProfileDvheSt ||
+            codecProfile == OMX_VIDEO_DolbyVisionProfileDvavSe) {
+            format->setInt32("bl_compatibility_id", bl_compatibility_id);
+        }
+        if (levels.map(level, &codecLevel)) {
+            format->setInt32("level", codecLevel);
+        }
+    }
+}
+
 static void parseH263ProfileLevelFromD263(const uint8_t *ptr, size_t size, sp<AMessage> &format) {
     if (size < 7) {
         return;
@@ -1411,6 +1471,12 @@
         msg->setBuffer("csd-0", buffer);
     }
 
+    if (meta->findData(kKeyDVCC, &type, &data, &size)) {
+        const uint8_t *ptr = (const uint8_t *)data;
+        ALOGV("DV: calling parseDolbyVisionProfileLevelFromDvcc with data size %zu", size);
+        parseDolbyVisionProfileLevelFromDvcc(ptr, size, msg);
+    }
+
     *format = msg;
 
     return OK;
@@ -1839,6 +1905,30 @@
             meta->setData(kKeyHVCC, kTypeHVCC, hvcc.data(), outsize);
         } else if (mime == MEDIA_MIMETYPE_VIDEO_AV1) {
             meta->setData(kKeyAV1C, 0, csd0->data(), csd0->size());
+        } else if (mime == MEDIA_MIMETYPE_VIDEO_DOLBY_VISION) {
+            if (msg->findBuffer("csd-2", &csd2)) {
+                meta->setData(kKeyDVCC, kTypeDVCC, csd2->data(), csd2->size());
+
+                size_t dvcc_size = 1024;
+                uint8_t dvcc[dvcc_size];
+                memcpy(dvcc, csd2->data(), dvcc_size);
+                const uint8_t profile = dvcc[2] >> 1;
+
+                if (profile > 1 && profile < 9) {
+                    std::vector<uint8_t> hvcc(csd0size + 1024);
+                    size_t outsize = reassembleHVCC(csd0, hvcc.data(), hvcc.size(), 4);
+                    meta->setData(kKeyHVCC, kTypeHVCC, hvcc.data(), outsize);
+                } else {
+                    sp<ABuffer> csd1;
+                    if (msg->findBuffer("csd-1", &csd1)) {
+                        std::vector<char> avcc(csd0size + csd1->size() + 1024);
+                        size_t outsize = reassembleAVCC(csd0, csd1, avcc.data());
+                        meta->setData(kKeyAVCC, kTypeAVCC, avcc.data(), outsize);
+                    }
+                }
+            } else {
+                ALOGW("We need csd-2!!. %s", msg->debugString().c_str());
+            }
         } else if (mime == MEDIA_MIMETYPE_VIDEO_VP9) {
             meta->setData(kKeyVp9CodecPrivate, 0, csd0->data(), csd0->size());
         } else if (mime == MEDIA_MIMETYPE_AUDIO_OPUS) {
@@ -1885,8 +1975,18 @@
         meta->setData(kKeyStreamHeader, 'mdat', csd0->data(), csd0->size());
     } else if (msg->findBuffer("d263", &csd0)) {
         meta->setData(kKeyD263, kTypeD263, csd0->data(), csd0->size());
-    }
+    } else if (mime == MEDIA_MIMETYPE_VIDEO_DOLBY_VISION && msg->findBuffer("csd-2", &csd2)) {
+        meta->setData(kKeyDVCC, kTypeDVCC, csd2->data(), csd2->size());
 
+        // Remove CSD-2 from the data here to avoid duplicate data in meta
+        meta->remove(kKeyOpaqueCSD2);
+
+        if (msg->findBuffer("csd-avc", &csd0)) {
+            meta->setData(kKeyAVCC, kTypeAVCC, csd0->data(), csd0->size());
+        } else if (msg->findBuffer("csd-hevc", &csd0)) {
+            meta->setData(kKeyHVCC, kTypeHVCC, csd0->data(), csd0->size());
+        }
+    }
     // XXX TODO add whatever other keys there are
 
 #if 0
diff --git a/media/libstagefright/include/media/stagefright/MetaDataBase.h b/media/libstagefright/include/media/stagefright/MetaDataBase.h
index 659bd5b..e17093a 100644
--- a/media/libstagefright/include/media/stagefright/MetaDataBase.h
+++ b/media/libstagefright/include/media/stagefright/MetaDataBase.h
@@ -59,6 +59,7 @@
     kKeyAACProfile        = 'aacp',  // int32_t
     kKeyAVCC              = 'avcc',  // raw data
     kKeyHVCC              = 'hvcc',  // raw data
+    kKeyDVCC              = 'dvcc',  // raw data
     kKeyAV1C              = 'av1c',  // raw data
     kKeyThumbnailHVCC     = 'thvc',  // raw data
     kKeyD263              = 'd263',  // raw data
@@ -245,6 +246,7 @@
     kTypeAVCC        = 'avcc',
     kTypeHVCC        = 'hvcc',
     kTypeAV1C        = 'av1c',
+    kTypeDVCC        = 'dvcc',
     kTypeD263        = 'd263',
 };