Merge "stagefright: prefer B frames for high frame-rate recording" into nyc-mr1-dev
diff --git a/media/libmediaplayerservice/nuplayer/NuPlayerDecoder.cpp b/media/libmediaplayerservice/nuplayer/NuPlayerDecoder.cpp
index fa19410..cf38efc 100644
--- a/media/libmediaplayerservice/nuplayer/NuPlayerDecoder.cpp
+++ b/media/libmediaplayerservice/nuplayer/NuPlayerDecoder.cpp
@@ -19,6 +19,8 @@
 #include <utils/Log.h>
 #include <inttypes.h>
 
+#include <algorithm>
+
 #include "NuPlayerCCDecoder.h"
 #include "NuPlayerDecoder.h"
 #include "NuPlayerRenderer.h"
@@ -41,7 +43,7 @@
 
 namespace android {
 
-static float kDisplayRefreshingRate = 60.f;
+static float kDisplayRefreshingRate = 60.f; // TODO: get this from the display
 
 // The default total video frame rate of a stream when that info is not available from
 // the source.
@@ -77,7 +79,7 @@
       mTimeChangePending(false),
       mFrameRateTotal(kDefaultVideoFrameRateTotal),
       mPlaybackSpeed(1.0f),
-      mNumVideoTemporalLayerTotal(1),
+      mNumVideoTemporalLayerTotal(1), // decode all layers
       mNumVideoTemporalLayerAllowed(1),
       mCurrentMaxVideoTemporalLayerId(0),
       mResumePending(false),
@@ -351,14 +353,14 @@
 
     int32_t numVideoTemporalLayerTotal;
     if (params->findInt32("temporal-layer-count", &numVideoTemporalLayerTotal)
-            && numVideoTemporalLayerTotal > 0
+            && numVideoTemporalLayerTotal >= 0
             && numVideoTemporalLayerTotal <= kMaxNumVideoTemporalLayers
             && mNumVideoTemporalLayerTotal != numVideoTemporalLayerTotal) {
         needAdjustLayers = true;
-        mNumVideoTemporalLayerTotal = numVideoTemporalLayerTotal;
+        mNumVideoTemporalLayerTotal = std::max(numVideoTemporalLayerTotal, 1);
     }
 
-    if (needAdjustLayers) {
+    if (needAdjustLayers && mNumVideoTemporalLayerTotal > 1) {
         // TODO: For now, layer fps is calculated for some specific architectures.
         // But it really should be extracted from the stream.
         mVideoTemporalLayerAggregateFps[0] =
@@ -378,25 +380,21 @@
     }
 
     if (needAdjustLayers) {
-        int32_t layerId;
-        for (layerId = 0; layerId < mNumVideoTemporalLayerTotal; ++layerId) {
-            if (mVideoTemporalLayerAggregateFps[layerId] * mPlaybackSpeed
-                    > kDisplayRefreshingRate) {
-                --layerId;
-                break;
+        float decodeFrameRate = mFrameRateTotal;
+        // enable temporal layering optimization only if we know the layering depth
+        if (mNumVideoTemporalLayerTotal > 1) {
+            int32_t layerId;
+            for (layerId = 0; layerId < mNumVideoTemporalLayerTotal - 1; ++layerId) {
+                if (mVideoTemporalLayerAggregateFps[layerId] * mPlaybackSpeed
+                        >= kDisplayRefreshingRate * 0.9) {
+                    break;
+                }
             }
+            mNumVideoTemporalLayerAllowed = layerId + 1;
+            decodeFrameRate = mVideoTemporalLayerAggregateFps[layerId];
         }
-        if (layerId < 0) {
-            layerId = 0;
-        } else if (layerId >= mNumVideoTemporalLayerTotal) {
-            layerId = mNumVideoTemporalLayerTotal - 1;
-        }
-        mNumVideoTemporalLayerAllowed = layerId + 1;
-        if (mCurrentMaxVideoTemporalLayerId > layerId) {
-            mCurrentMaxVideoTemporalLayerId = layerId;
-        }
-        ALOGV("onSetParameters: allowed layers=%d, current max layerId=%d",
-                mNumVideoTemporalLayerAllowed, mCurrentMaxVideoTemporalLayerId);
+        ALOGV("onSetParameters: allowed layers=%d, decodeFps=%g",
+                mNumVideoTemporalLayerAllowed, decodeFrameRate);
 
         if (mCodec == NULL) {
             ALOGW("onSetParameters called before codec is created.");
@@ -404,8 +402,7 @@
         }
 
         sp<AMessage> codecParams = new AMessage();
-        codecParams->setFloat("operating-rate",
-                mVideoTemporalLayerAggregateFps[layerId] * mPlaybackSpeed);
+        codecParams->setFloat("operating-rate", decodeFrameRate * mPlaybackSpeed);
         mCodec->setParameters(codecParams);
     }
 }
@@ -818,11 +815,12 @@
         dropAccessUnit = false;
         if (!mIsAudio && !mIsSecure) {
             int32_t layerId = 0;
+            bool haveLayerId = accessUnit->meta()->findInt32("temporal-layer-id", &layerId);
             if (mRenderer->getVideoLateByUs() > 100000ll
                     && mIsVideoAVC
                     && !IsAVCReferenceFrame(accessUnit)) {
                 dropAccessUnit = true;
-            } else if (accessUnit->meta()->findInt32("temporal-layer-id", &layerId)) {
+            } else if (haveLayerId && mNumVideoTemporalLayerTotal > 1) {
                 // Add only one layer each time.
                 if (layerId > mCurrentMaxVideoTemporalLayerId + 1
                         || layerId >= mNumVideoTemporalLayerAllowed) {
@@ -832,9 +830,14 @@
                             mCurrentMaxVideoTemporalLayerId);
                 } else if (layerId > mCurrentMaxVideoTemporalLayerId) {
                     mCurrentMaxVideoTemporalLayerId = layerId;
+                } else if (layerId == 0 && mNumVideoTemporalLayerTotal > 1 && IsIDR(accessUnit)) {
+                    mCurrentMaxVideoTemporalLayerId = mNumVideoTemporalLayerTotal - 1;
                 }
             }
             if (dropAccessUnit) {
+                if (layerId <= mCurrentMaxVideoTemporalLayerId && layerId > 0) {
+                    mCurrentMaxVideoTemporalLayerId = layerId - 1;
+                }
                 ++mNumInputFramesDropped;
             }
         }
diff --git a/media/libstagefright/ACodec.cpp b/media/libstagefright/ACodec.cpp
index 4fa4bf6..9bb4b39 100644
--- a/media/libstagefright/ACodec.cpp
+++ b/media/libstagefright/ACodec.cpp
@@ -3968,14 +3968,31 @@
     return err;
 }
 
-static OMX_U32 setPFramesSpacing(int32_t iFramesInterval, int32_t frameRate) {
-    if (iFramesInterval < 0) {
-        return 0xFFFFFFFF;
-    } else if (iFramesInterval == 0) {
+static OMX_U32 setPFramesSpacing(
+        int32_t iFramesInterval /* seconds */, int32_t frameRate, uint32_t BFramesSpacing = 0) {
+    // BFramesSpacing is the number of B frames between I/P frames
+    // PFramesSpacing (the value to be returned) is the number of P frames between I frames
+    //
+    // keyFrameInterval = ((PFramesSpacing + 1) * BFramesSpacing) + PFramesSpacing + 1
+    //                                     ^^^                            ^^^        ^^^
+    //                              number of B frames                number of P    I frame
+    //
+    //                  = (PFramesSpacing + 1) * (BFramesSpacing + 1)
+    //
+    // E.g.
+    //      I   P   I  : I-interval: 8, nPFrames 1, nBFrames 3
+    //       BBB BBB
+
+    if (iFramesInterval < 0) { // just 1 key frame
+        return 0xFFFFFFFE; // don't use maxint as key-frame-interval calculation will add 1
+    } else if (iFramesInterval == 0) { // just key frames
         return 0;
     }
-    OMX_U32 ret = frameRate * iFramesInterval;
-    return ret;
+
+    // round down as key-frame-interval is an upper limit
+    uint32_t keyFrameInterval = uint32_t(frameRate * iFramesInterval);
+    OMX_U32 ret = keyFrameInterval / (BFramesSpacing + 1);
+    return ret > 0 ? ret - 1 : 0;
 }
 
 static OMX_VIDEO_CONTROLRATETYPE getBitrateMode(const sp<AMessage> &msg) {
@@ -4023,11 +4040,11 @@
     mpeg4type.nAllowedPictureTypes =
         OMX_VIDEO_PictureTypeI | OMX_VIDEO_PictureTypeP;
 
-    mpeg4type.nPFrames = setPFramesSpacing(iFrameInterval, frameRate);
+    mpeg4type.nBFrames = 0;
+    mpeg4type.nPFrames = setPFramesSpacing(iFrameInterval, frameRate, mpeg4type.nBFrames);
     if (mpeg4type.nPFrames == 0) {
         mpeg4type.nAllowedPictureTypes = OMX_VIDEO_PictureTypeI;
     }
-    mpeg4type.nBFrames = 0;
     mpeg4type.nIDCVLCThreshold = 0;
     mpeg4type.bACPred = OMX_TRUE;
     mpeg4type.nMaxPacketSize = 256;
@@ -4100,11 +4117,11 @@
     h263type.nAllowedPictureTypes =
         OMX_VIDEO_PictureTypeI | OMX_VIDEO_PictureTypeP;
 
-    h263type.nPFrames = setPFramesSpacing(iFrameInterval, frameRate);
+    h263type.nBFrames = 0;
+    h263type.nPFrames = setPFramesSpacing(iFrameInterval, frameRate, h263type.nBFrames);
     if (h263type.nPFrames == 0) {
         h263type.nAllowedPictureTypes = OMX_VIDEO_PictureTypeI;
     }
-    h263type.nBFrames = 0;
 
     int32_t profile;
     if (msg->findInt32("profile", &profile)) {
@@ -4255,8 +4272,15 @@
         h264type.eProfile = static_cast<OMX_VIDEO_AVCPROFILETYPE>(profile);
         h264type.eLevel = static_cast<OMX_VIDEO_AVCLEVELTYPE>(level);
     } else {
-        // Use baseline profile for AVC recording if profile is not specified.
+        // Use largest supported profile for AVC recording if profile is not specified.
         h264type.eProfile = OMX_VIDEO_AVCProfileBaseline;
+        for (OMX_VIDEO_AVCPROFILETYPE profile : {
+                OMX_VIDEO_AVCProfileHigh, OMX_VIDEO_AVCProfileMain }) {
+            if (verifySupportForProfileAndLevel(profile, 0) == OK) {
+                h264type.eProfile = profile;
+                break;
+            }
+        }
     }
 
     ALOGI("setupAVCEncoderParameters with [profile: %s] [level: %s]",
@@ -4267,7 +4291,7 @@
         h264type.bUseHadamard = OMX_TRUE;
         h264type.nRefFrames = 1;
         h264type.nBFrames = 0;
-        h264type.nPFrames = setPFramesSpacing(iFrameInterval, frameRate);
+        h264type.nPFrames = setPFramesSpacing(iFrameInterval, frameRate, h264type.nBFrames);
         if (h264type.nPFrames == 0) {
             h264type.nAllowedPictureTypes = OMX_VIDEO_PictureTypeI;
         }
@@ -4285,7 +4309,7 @@
         h264type.bUseHadamard = OMX_TRUE;
         h264type.nRefFrames = 2;
         h264type.nBFrames = 1;
-        h264type.nPFrames = setPFramesSpacing(iFrameInterval, frameRate);
+        h264type.nPFrames = setPFramesSpacing(iFrameInterval, frameRate, h264type.nBFrames);
         h264type.nAllowedPictureTypes =
             OMX_VIDEO_PictureTypeI | OMX_VIDEO_PictureTypeP | OMX_VIDEO_PictureTypeB;
         h264type.nRefIdx10ActiveMinus1 = 0;
@@ -4393,7 +4417,7 @@
         hevcType.eLevel = static_cast<OMX_VIDEO_HEVCLEVELTYPE>(level);
     }
     // TODO: finer control?
-    hevcType.nKeyFrameInterval = setPFramesSpacing(iFrameInterval, frameRate);
+    hevcType.nKeyFrameInterval = setPFramesSpacing(iFrameInterval, frameRate) + 1;
 
     err = mOMX->setParameter(
             mNode, (OMX_INDEXTYPE)OMX_IndexParamVideoHevc, &hevcType, sizeof(hevcType));
@@ -4465,7 +4489,7 @@
 
     if (err == OK) {
         if (iFrameInterval > 0) {
-            vp8type.nKeyFrameInterval = setPFramesSpacing(iFrameInterval, frameRate);
+            vp8type.nKeyFrameInterval = setPFramesSpacing(iFrameInterval, frameRate) + 1;
         }
         vp8type.eTemporalPattern = pattern;
         vp8type.nTemporalLayerCount = tsLayers;
diff --git a/media/libstagefright/MPEG4Writer.cpp b/media/libstagefright/MPEG4Writer.cpp
index 427891d..ec534ef 100644
--- a/media/libstagefright/MPEG4Writer.cpp
+++ b/media/libstagefright/MPEG4Writer.cpp
@@ -17,6 +17,8 @@
 //#define LOG_NDEBUG 0
 #define LOG_TAG "MPEG4Writer"
 
+#include <algorithm>
+
 #include <arpa/inet.h>
 #include <fcntl.h>
 #include <inttypes.h>
@@ -30,6 +32,7 @@
 
 #include <media/stagefright/foundation/ADebug.h>
 #include <media/stagefright/foundation/AMessage.h>
+#include <media/stagefright/foundation/AUtils.h>
 #include <media/stagefright/foundation/ColorUtils.h>
 #include <media/stagefright/MPEG4Writer.h>
 #include <media/stagefright/MediaBuffer.h>
@@ -256,6 +259,7 @@
     int32_t mTrackId;
     int64_t mTrackDurationUs;
     int64_t mMaxChunkDurationUs;
+    int64_t mLastDecodingTimeUs;
 
     int64_t mEstimatedTrackSizeBytes;
     int64_t mMdatSizeBytes;
@@ -1928,6 +1932,7 @@
     mEstimatedTrackSizeBytes = 0;
     mMdatSizeBytes = 0;
     mMaxChunkDurationUs = 0;
+    mLastDecodingTimeUs = -1;
 
     pthread_create(&mThread, &attr, ThreadWrapper, this);
     pthread_attr_destroy(&attr);
@@ -2512,6 +2517,17 @@
             int64_t decodingTimeUs;
             CHECK(meta_data->findInt64(kKeyDecodingTime, &decodingTimeUs));
             decodingTimeUs -= previousPausedDurationUs;
+
+            // ensure non-negative, monotonic decoding time
+            if (mLastDecodingTimeUs < 0) {
+                decodingTimeUs = std::max((int64_t)0, decodingTimeUs);
+            } else {
+                // increase decoding time by at least 1 tick
+                decodingTimeUs = std::max(
+                        mLastDecodingTimeUs + divUp(1000000, mTimeScale), decodingTimeUs);
+            }
+
+            mLastDecodingTimeUs = decodingTimeUs;
             cttsOffsetTimeUs =
                     timestampUs + kMaxCttsOffsetTimeUs - decodingTimeUs;
             if (WARN_UNLESS(cttsOffsetTimeUs >= 0ll, "for %s track", trackName)) {