Adding dolby-vision boxes to MPEG4Extractor
Bug: 129367182
Test: build, cts
Change-Id: I5bff3f6c626730017d6febef384893ab194b5927
diff --git a/media/extractors/mp4/MPEG4Extractor.cpp b/media/extractors/mp4/MPEG4Extractor.cpp
index b91d16f..841ee0e 100755
--- a/media/extractors/mp4/MPEG4Extractor.cpp
+++ b/media/extractors/mp4/MPEG4Extractor.cpp
@@ -133,6 +133,7 @@
bool mIsAVC;
bool mIsHEVC;
+ bool mIsDolbyVision;
bool mIsAC4;
bool mIsPcm;
size_t mNALLengthSize;
@@ -337,6 +338,13 @@
case FOURCC("hvc1"):
case FOURCC("hev1"):
return MEDIA_MIMETYPE_VIDEO_HEVC;
+
+ case FOURCC("dvav"):
+ case FOURCC("dva1"):
+ case FOURCC("dvhe"):
+ case FOURCC("dvh1"):
+ return MEDIA_MIMETYPE_VIDEO_DOLBY_VISION;
+
case FOURCC("ac-4"):
return MEDIA_MIMETYPE_AUDIO_AC4;
case FOURCC("Opus"):
@@ -1062,6 +1070,59 @@
mLastTrack->mTx3gBuffer = NULL;
}
+ const char *mime;
+ AMediaFormat_getString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME, &mime);
+
+ if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
+ void *data;
+ size_t size;
+
+ if (AMediaFormat_getBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
+ const uint8_t *ptr = (const uint8_t *)data;
+ const uint8_t profile = ptr[2] >> 1;
+ const uint8_t bl_compatibility_id = (ptr[4]) >> 4;
+
+ if (4 == profile || 7 == profile ||
+ (profile >= 8 && profile < 10 && bl_compatibility_id)) {
+ // we need a backward compatible track
+ ALOGV("Adding new backward compatible track");
+ Track *track_b = new Track;
+
+ track_b->timescale = mLastTrack->timescale;
+ track_b->sampleTable = mLastTrack->sampleTable;
+ track_b->includes_expensive_metadata = mLastTrack->includes_expensive_metadata;
+ track_b->skipTrack = mLastTrack->skipTrack;
+ track_b->has_elst = mLastTrack->has_elst;
+ track_b->elst_media_time = mLastTrack->elst_media_time;
+ track_b->elst_segment_duration = mLastTrack->elst_segment_duration;
+ track_b->elstShiftStartTicks = mLastTrack->elstShiftStartTicks;
+ track_b->subsample_encryption = mLastTrack->subsample_encryption;
+
+ track_b->mTx3gBuffer = mLastTrack->mTx3gBuffer;
+ track_b->mTx3gSize = mLastTrack->mTx3gSize;
+ track_b->mTx3gFilled = mLastTrack->mTx3gFilled;
+
+ track_b->meta = AMediaFormat_new();
+ AMediaFormat_copy(track_b->meta, mLastTrack->meta);
+
+ mLastTrack->next = track_b;
+ track_b->next = NULL;
+
+ auto id = track_b->meta->mFormat->findEntryByName(AMEDIAFORMAT_KEY_CSD_2);
+ track_b->meta->mFormat->removeEntryAt(id);
+
+ if (4 == profile || 7 == profile || 8 == profile ) {
+ AMediaFormat_setString(track_b->meta,
+ AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_HEVC);
+ } else if (9 == profile) {
+ AMediaFormat_setString(track_b->meta,
+ AMEDIAFORMAT_KEY_MIME, MEDIA_MIMETYPE_VIDEO_AVC);
+ } // Should never get to else part
+
+ mLastTrack = track_b;
+ }
+ }
+ }
} else if (chunk_type == FOURCC("moov")) {
mInitCheck = OK;
@@ -1830,6 +1891,10 @@
case FOURCC("avc1"):
case FOURCC("hvc1"):
case FOURCC("hev1"):
+ case FOURCC("dvav"):
+ case FOURCC("dva1"):
+ case FOURCC("dvhe"):
+ case FOURCC("dvh1"):
case FOURCC("av01"):
{
uint8_t buffer[78];
@@ -1984,7 +2049,8 @@
// for audio, use 128KB
max_size = 1024 * 128;
} else if (!strcmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
- || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
+ || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
+ || !strcmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
// AVC & HEVC requires compression ratio of at least 2, and uses
// macroblocks
max_size = ((width + 15) / 16) * ((height + 15) / 16) * 192;
@@ -2315,6 +2381,30 @@
*offset += chunk_size;
break;
}
+ case FOURCC("dvcC"):
+ case FOURCC("dvvC"): {
+ auto buffer = heapbuffer<uint8_t>(chunk_data_size);
+
+ if (buffer.get() == NULL) {
+ ALOGE("b/28471206");
+ return NO_MEMORY;
+ }
+
+ if (mDataSource->readAt(data_offset, buffer.get(), chunk_data_size) < chunk_data_size) {
+ return ERROR_IO;
+ }
+
+ if (mLastTrack == NULL)
+ return ERROR_MALFORMED;
+
+ AMediaFormat_setBuffer(mLastTrack->meta, AMEDIAFORMAT_KEY_CSD_2,
+ buffer.get(), chunk_data_size);
+ AMediaFormat_setString(mLastTrack->meta, AMEDIAFORMAT_KEY_MIME,
+ MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
+
+ *offset += chunk_size;
+ break;
+ }
case FOURCC("d263"):
{
*offset += chunk_size;
@@ -4127,7 +4217,20 @@
if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
itemTable = mItemTable;
}
- } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
+ } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
+ void *data;
+ size_t size;
+ if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
+ return NULL;
+ }
+
+ const uint8_t *ptr = (const uint8_t *)data;
+
+ if (size != 24 || ptr[0] != 1 || ptr[1] != 0) {
+ // dv_version_major == 1, dv_version_minor == 0;
+ return NULL;
+ }
+ } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
void *data;
size_t size;
if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
@@ -4172,6 +4275,10 @@
if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size)) {
return ERROR_MALFORMED;
}
+ } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
+ if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_2, &data, &size)) {
+ return ERROR_MALFORMED;
+ }
} else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AV1)) {
if (!AMediaFormat_getBuffer(track->meta, AMEDIAFORMAT_KEY_CSD_0, &data, &size)) {
return ERROR_MALFORMED;
@@ -4659,6 +4766,7 @@
mCurrentSampleInfoOffsets(NULL),
mIsAVC(false),
mIsHEVC(false),
+ mIsDolbyVision(false),
mIsAC4(false),
mIsPcm(false),
mNALLengthSize(0),
@@ -4698,6 +4806,7 @@
mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
mIsAC4 = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AC4);
+ mIsDolbyVision = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION);
if (mIsAVC) {
void *data;
@@ -4722,7 +4831,42 @@
CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
mNALLengthSize = 1 + (ptr[14 + 7] & 3);
- }
+ } else if (mIsDolbyVision) {
+ ALOGV("%s DolbyVision stream detected", __FUNCTION__);
+ void *data;
+ size_t size;
+ CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_2, &data, &size));
+
+ const uint8_t *ptr = (const uint8_t *)data;
+
+ CHECK(size == 24);
+ CHECK_EQ((unsigned)ptr[0], 1u); // dv_major_version == 1
+ CHECK_EQ((unsigned)ptr[1], 0u); // dv_minor_version == 0
+
+ const uint8_t profile = ptr[2] >> 1;
+
+ // profile == (0,1,9) --> AVC; profile = (2,3,4,5,6,7,8) --> HEVC;
+ if (profile > 1 && profile < 9) {
+
+ CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_HEVC, &data, &size));
+
+ const uint8_t *ptr = (const uint8_t *)data;
+
+ CHECK(size >= 22);
+ CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
+
+ mNALLengthSize = 1 + (ptr[14 + 7] & 3);
+ } else {
+
+ CHECK(AMediaFormat_getBuffer(format, AMEDIAFORMAT_KEY_CSD_AVC, &data, &size));
+ const uint8_t *ptr = (const uint8_t *)data;
+
+ CHECK(size >= 7);
+ CHECK_EQ((unsigned)ptr[0], 1u); // configurationVersion == 1
+ // The number of bytes used to encode the length of a NAL unit.
+ mNALLengthSize = 1 + (ptr[4] & 3);
+ }
+ }
mIsPcm = !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_RAW);
mIsAudio = !strncasecmp(mime, "audio/", 6);
@@ -5789,7 +5933,7 @@
}
}
- if (!mIsAVC && !mIsHEVC && !mIsAC4) {
+ if (!mIsAVC && !mIsHEVC && !mIsDolbyVision && !mIsAC4) {
if (newBuffer) {
if (mIsPcm) {
// The twos' PCM block reader assumes that all samples has the same size.
@@ -6179,7 +6323,7 @@
AMediaFormat_setBuffer(bufmeta, AMEDIAFORMAT_KEY_CRYPTO_IV, iv, ivlength);
}
- if (!mIsAVC && !mIsHEVC) {
+ if (!mIsAVC && !mIsHEVC && !mIsDolbyVision) {
if (newBuffer) {
if (!isInRange((size_t)0u, mBuffer->size(), size)) {
mBuffer->release();
diff --git a/media/libstagefright/MPEG4Writer.cpp b/media/libstagefright/MPEG4Writer.cpp
index f130c9b..bf4e7de 100644
--- a/media/libstagefright/MPEG4Writer.cpp
+++ b/media/libstagefright/MPEG4Writer.cpp
@@ -2213,8 +2213,10 @@
} else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
mMeta->findData(kKeyHVCC, &type, &data, &size);
- } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
- || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
+ } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_DOLBY_VISION)) {
+ mMeta->findData(kKeyDVCC, &type, &data, &size);
+ } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4) ||
+ !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
if (mMeta->findData(kKeyESDS, &type, &data, &size)) {
ESDS esds(data, size);
if (esds.getCodecSpecificInfo(&data, &size) == OK &&
diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp
index bda6053..76bbdf5 100644
--- a/media/libstagefright/Utils.cpp
+++ b/media/libstagefright/Utils.cpp
@@ -227,6 +227,66 @@
}
}
+static void parseDolbyVisionProfileLevelFromDvcc(const uint8_t *ptr, size_t size, sp<AMessage> &format) {
+ // dv_major_version == 1, dv_minor_version == 0
+ if (size < 4 || ptr[0] != 1 || ptr[1] != 0) {
+ return;
+ }
+
+ const uint8_t profile = ptr[2] >> 1;
+ const uint8_t level = ((ptr[2] & 0x1) << 5) | ((ptr[3] >> 3) & 0x1f);
+ const uint8_t rpu_present_flag = (ptr[3] >> 2) & 0x01;
+ const uint8_t el_present_flag = (ptr[3] >> 1) & 0x01;
+ const uint8_t bl_present_flag = (ptr[3] & 0x01);
+ const int32_t bl_compatibility_id = (int32_t)(ptr[4] >> 4);
+
+ ALOGV("profile-level-compatibility value in dv(c|v)c box %d-%d-%d",
+ profile, level, bl_compatibility_id);
+
+ // All Dolby Profiles will have profile and level info in MediaFormat
+ // Profile 8 and 9 will have bl_compatibility_id too.
+ const static ALookup<uint8_t, OMX_VIDEO_DOLBYVISIONPROFILETYPE> profiles{
+ {1, OMX_VIDEO_DolbyVisionProfileDvavPen},
+ {3, OMX_VIDEO_DolbyVisionProfileDvheDen},
+ {4, OMX_VIDEO_DolbyVisionProfileDvheDtr},
+ {5, OMX_VIDEO_DolbyVisionProfileDvheStn},
+ {6, OMX_VIDEO_DolbyVisionProfileDvheDth},
+ {7, OMX_VIDEO_DolbyVisionProfileDvheDtb},
+ {8, OMX_VIDEO_DolbyVisionProfileDvheSt},
+ {9, OMX_VIDEO_DolbyVisionProfileDvavSe},
+ };
+
+ const static ALookup<uint8_t, OMX_VIDEO_DOLBYVISIONLEVELTYPE> levels{
+ {0, OMX_VIDEO_DolbyVisionLevelUnknown},
+ {1, OMX_VIDEO_DolbyVisionLevelHd24},
+ {2, OMX_VIDEO_DolbyVisionLevelHd30},
+ {3, OMX_VIDEO_DolbyVisionLevelFhd24},
+ {4, OMX_VIDEO_DolbyVisionLevelFhd30},
+ {5, OMX_VIDEO_DolbyVisionLevelFhd60},
+ {6, OMX_VIDEO_DolbyVisionLevelUhd24},
+ {7, OMX_VIDEO_DolbyVisionLevelUhd30},
+ {8, OMX_VIDEO_DolbyVisionLevelUhd48},
+ {9, OMX_VIDEO_DolbyVisionLevelUhd60},
+ };
+ // set rpuAssoc
+ if (rpu_present_flag && el_present_flag && !bl_present_flag) {
+ format->setInt32("rpuAssoc", 1);
+ }
+ // set profile & level if they are recognized
+ OMX_VIDEO_DOLBYVISIONPROFILETYPE codecProfile;
+ OMX_VIDEO_DOLBYVISIONLEVELTYPE codecLevel;
+ if (profiles.map(profile, &codecProfile)) {
+ format->setInt32("profile", codecProfile);
+ if (codecProfile == OMX_VIDEO_DolbyVisionProfileDvheSt ||
+ codecProfile == OMX_VIDEO_DolbyVisionProfileDvavSe) {
+ format->setInt32("bl_compatibility_id", bl_compatibility_id);
+ }
+ if (levels.map(level, &codecLevel)) {
+ format->setInt32("level", codecLevel);
+ }
+ }
+}
+
static void parseH263ProfileLevelFromD263(const uint8_t *ptr, size_t size, sp<AMessage> &format) {
if (size < 7) {
return;
@@ -1411,6 +1471,12 @@
msg->setBuffer("csd-0", buffer);
}
+ if (meta->findData(kKeyDVCC, &type, &data, &size)) {
+ const uint8_t *ptr = (const uint8_t *)data;
+ ALOGV("DV: calling parseDolbyVisionProfileLevelFromDvcc with data size %zu", size);
+ parseDolbyVisionProfileLevelFromDvcc(ptr, size, msg);
+ }
+
*format = msg;
return OK;
@@ -1839,6 +1905,30 @@
meta->setData(kKeyHVCC, kTypeHVCC, hvcc.data(), outsize);
} else if (mime == MEDIA_MIMETYPE_VIDEO_AV1) {
meta->setData(kKeyAV1C, 0, csd0->data(), csd0->size());
+ } else if (mime == MEDIA_MIMETYPE_VIDEO_DOLBY_VISION) {
+ if (msg->findBuffer("csd-2", &csd2)) {
+ meta->setData(kKeyDVCC, kTypeDVCC, csd2->data(), csd2->size());
+
+ size_t dvcc_size = 1024;
+ uint8_t dvcc[dvcc_size];
+ memcpy(dvcc, csd2->data(), dvcc_size);
+ const uint8_t profile = dvcc[2] >> 1;
+
+ if (profile > 1 && profile < 9) {
+ std::vector<uint8_t> hvcc(csd0size + 1024);
+ size_t outsize = reassembleHVCC(csd0, hvcc.data(), hvcc.size(), 4);
+ meta->setData(kKeyHVCC, kTypeHVCC, hvcc.data(), outsize);
+ } else {
+ sp<ABuffer> csd1;
+ if (msg->findBuffer("csd-1", &csd1)) {
+ std::vector<char> avcc(csd0size + csd1->size() + 1024);
+ size_t outsize = reassembleAVCC(csd0, csd1, avcc.data());
+ meta->setData(kKeyAVCC, kTypeAVCC, avcc.data(), outsize);
+ }
+ }
+ } else {
+ ALOGW("We need csd-2!!. %s", msg->debugString().c_str());
+ }
} else if (mime == MEDIA_MIMETYPE_VIDEO_VP9) {
meta->setData(kKeyVp9CodecPrivate, 0, csd0->data(), csd0->size());
} else if (mime == MEDIA_MIMETYPE_AUDIO_OPUS) {
@@ -1885,8 +1975,18 @@
meta->setData(kKeyStreamHeader, 'mdat', csd0->data(), csd0->size());
} else if (msg->findBuffer("d263", &csd0)) {
meta->setData(kKeyD263, kTypeD263, csd0->data(), csd0->size());
- }
+ } else if (mime == MEDIA_MIMETYPE_VIDEO_DOLBY_VISION && msg->findBuffer("csd-2", &csd2)) {
+ meta->setData(kKeyDVCC, kTypeDVCC, csd2->data(), csd2->size());
+ // Remove CSD-2 from the data here to avoid duplicate data in meta
+ meta->remove(kKeyOpaqueCSD2);
+
+ if (msg->findBuffer("csd-avc", &csd0)) {
+ meta->setData(kKeyAVCC, kTypeAVCC, csd0->data(), csd0->size());
+ } else if (msg->findBuffer("csd-hevc", &csd0)) {
+ meta->setData(kKeyHVCC, kTypeHVCC, csd0->data(), csd0->size());
+ }
+ }
// XXX TODO add whatever other keys there are
#if 0
diff --git a/media/libstagefright/include/media/stagefright/MetaDataBase.h b/media/libstagefright/include/media/stagefright/MetaDataBase.h
index 659bd5b..e17093a 100644
--- a/media/libstagefright/include/media/stagefright/MetaDataBase.h
+++ b/media/libstagefright/include/media/stagefright/MetaDataBase.h
@@ -59,6 +59,7 @@
kKeyAACProfile = 'aacp', // int32_t
kKeyAVCC = 'avcc', // raw data
kKeyHVCC = 'hvcc', // raw data
+ kKeyDVCC = 'dvcc', // raw data
kKeyAV1C = 'av1c', // raw data
kKeyThumbnailHVCC = 'thvc', // raw data
kKeyD263 = 'd263', // raw data
@@ -245,6 +246,7 @@
kTypeAVCC = 'avcc',
kTypeHVCC = 'hvcc',
kTypeAV1C = 'av1c',
+ kTypeDVCC = 'dvcc',
kTypeD263 = 'd263',
};