stagefright: Enhance MPEG4 writer/extractor to store/retrieve layer info

Store temporal layer-count in MP4 meta-keys.
Enhance MPEG4Extractor to parse layer-count and make it available in
file-meta.
For AVC, parse SVC-extension NALs (if available), to extract
the layer-id and make it avialble in buffer-meta.

Bug: 27596987
Change-Id: I84a8914b470a0aadec0e02692296b1915a6c776e
(cherry picked from commit 09c8bb3d3b01d213fb845cd9c24c5e18443d7626)
diff --git a/include/media/stagefright/MPEG4Writer.h b/include/media/stagefright/MPEG4Writer.h
index a6901a8..cbd642c 100644
--- a/include/media/stagefright/MPEG4Writer.h
+++ b/include/media/stagefright/MPEG4Writer.h
@@ -65,6 +65,7 @@
 
     status_t setGeoData(int latitudex10000, int longitudex10000);
     status_t setCaptureRate(float captureFps);
+    status_t setTemporalLayerCount(uint32_t layerCount);
     virtual void setStartTimeOffsetMs(int ms) { mStartTimeOffsetMs = ms; }
     virtual int32_t getStartTimeOffsetMs() const { return mStartTimeOffsetMs; }
 
diff --git a/include/media/stagefright/MetaData.h b/include/media/stagefright/MetaData.h
index be7e5c1..6ba7b32 100644
--- a/include/media/stagefright/MetaData.h
+++ b/include/media/stagefright/MetaData.h
@@ -204,6 +204,8 @@
                                    // transfer Function, value defined by ColorAspects.Transfer.
     kKeyColorMatrix      = 'cMtx', // int32_t,
                                    // color Matrix, value defined by ColorAspects.MatrixCoeffs.
+    kKeyTemporalLayerId  = 'iLyr', // int32_t, temporal layer-id. 0-based (0 => base layer)
+    kKeyTemporalLayerCount = 'cLyr', // int32_t, number of temporal layers encoded
 };
 
 enum {
diff --git a/media/libstagefright/MPEG4Extractor.cpp b/media/libstagefright/MPEG4Extractor.cpp
index 6a67fcf..58448010 100644
--- a/media/libstagefright/MPEG4Extractor.cpp
+++ b/media/libstagefright/MPEG4Extractor.cpp
@@ -44,6 +44,7 @@
 
 #include <byteswap.h>
 #include "include/ID3.h"
+#include "include/avc_utils.h"
 
 #ifndef UINT32_MAX
 #define UINT32_MAX       (4294967295U)
@@ -2471,6 +2472,15 @@
         if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.capture.fps")) {
             mFileMetaData->setFloat(kKeyCaptureFramerate, *(float *)&val);
         }
+    } else if (dataType == 67 && dataSize >= 4) {
+        // BE signed int32
+        uint32_t val;
+        if (!mDataSource->getUInt32(offset, &val)) {
+            return ERROR_MALFORMED;
+        }
+        if (!strcasecmp(mMetaKeyMap[index].c_str(), "com.android.video.temporal_layers_count")) {
+            mFileMetaData->setInt32(kKeyTemporalLayerCount, val);
+        }
     } else {
         // add more keys if needed
         ALOGV("ignoring key: type %d, size %d", dataType, dataSize);
@@ -4464,6 +4474,12 @@
                     kKeyTargetTime, targetSampleTimeUs);
         }
 
+        if (mIsAVC) {
+            uint32_t layerId = FindAVCLayerId(
+                    (const uint8_t *)mBuffer->data(), mBuffer->range_length());
+            mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId);
+        }
+
         if (isSyncSample) {
             mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
         }
@@ -4627,6 +4643,12 @@
                         kKeyTargetTime, targetSampleTimeUs);
             }
 
+            if (mIsAVC) {
+                uint32_t layerId = FindAVCLayerId(
+                        (const uint8_t *)mBuffer->data(), mBuffer->range_length());
+                mBuffer->meta_data()->setInt32(kKeyTemporalLayerId, layerId);
+            }
+
             if (isSyncSample) {
                 mBuffer->meta_data()->setInt32(kKeyIsSyncFrame, 1);
             }
diff --git a/media/libstagefright/MPEG4Writer.cpp b/media/libstagefright/MPEG4Writer.cpp
index 24fb987..0180932 100644
--- a/media/libstagefright/MPEG4Writer.cpp
+++ b/media/libstagefright/MPEG4Writer.cpp
@@ -70,6 +70,7 @@
 static const char kMetaKey_Build[]      = "com.android.build";
 #endif
 static const char kMetaKey_CaptureFps[] = "com.android.capture.fps";
+static const char kMetaKey_TemporalLayerCount[] = "com.android.video.temporal_layers_count";
 
 static const uint8_t kMandatoryHevcNalUnitTypes[3] = {
     kHevcNalUnitTypeVps,
@@ -1381,6 +1382,19 @@
     return OK;
 }
 
+status_t MPEG4Writer::setTemporalLayerCount(uint32_t layerCount) {
+    if (layerCount > 9) {
+        return BAD_VALUE;
+    }
+
+    if (layerCount > 0) {
+        mMetaKeys->setInt32(kMetaKey_TemporalLayerCount, layerCount);
+        mMoovExtraSize += sizeof(kMetaKey_TemporalLayerCount) + 4 + 32;
+    }
+
+    return OK;
+}
+
 void MPEG4Writer::write(const void *data, size_t size) {
     write(data, 1, size);
 }
diff --git a/media/libstagefright/avc_utils.cpp b/media/libstagefright/avc_utils.cpp
index ccf3440..0396dc6 100644
--- a/media/libstagefright/avc_utils.cpp
+++ b/media/libstagefright/avc_utils.cpp
@@ -491,6 +491,28 @@
     return true;
 }
 
+uint32_t FindAVCLayerId(const uint8_t *data, size_t size) {
+    CHECK(data != NULL);
+
+    const unsigned kSvcNalType = 0xE;
+    const unsigned kSvcNalSearchRange = 32;
+    // SVC NAL
+    // |---0 1110|1--- ----|---- ----|iii- ---|
+    //       ^                        ^
+    //   NAL-type = 0xE               layer-Id
+    //
+    // layer_id 0 is for base layer, while 1, 2, ... are enhancement layers.
+    // Layer n uses reference frames from layer 0, 1, ..., n-1.
+
+    uint32_t layerId = 0;
+    sp<ABuffer> svcNAL = FindNAL(
+            data, size > kSvcNalSearchRange ? kSvcNalSearchRange : size, kSvcNalType);
+    if (svcNAL != NULL && svcNAL->size() >= 4) {
+        layerId = (*(svcNAL->data() + 3) >> 5) & 0x7;
+    }
+    return layerId;
+}
+
 sp<MetaData> MakeAACCodecSpecificData(
         unsigned profile, unsigned sampling_freq_index,
         unsigned channel_configuration) {
diff --git a/media/libstagefright/include/avc_utils.h b/media/libstagefright/include/avc_utils.h
index 7465b35..235ee63 100644
--- a/media/libstagefright/include/avc_utils.h
+++ b/media/libstagefright/include/avc_utils.h
@@ -85,6 +85,7 @@
 
 bool IsIDR(const sp<ABuffer> &accessUnit);
 bool IsAVCReferenceFrame(const sp<ABuffer> &accessUnit);
+uint32_t FindAVCLayerId(const uint8_t *data, size_t size);
 
 const char *AVCProfileToString(uint8_t profile);