heif: fixes for image sequences and dual-function files

Adding support for two new sets of APIs on MediaMetadataRetriever:

- getImageAtIndex() and getPrimaryImage()
- getFrameAtIndex() and getFramesAtIndex()

Outline of changes:

- Proper indexing of all displayable still images, so that they
  can be retrieved by getImageAtIndex()

- Exposing still images as "image/x.android.heic" tracks in
  MediaExtractor with necessary descriptive keys (such as "grid-*")

- Support to retrieve video frames by absolute index instead
  of timestamps, as image use cases mostly are interested in
  getting the images and care less about timing.

- Support to retrieve video frames in batches because retrieving
  one frame at a time is inefficient.

- Refactor image / frame decoding code into FrameDecoder, and split
  still image decoding and video sequence decoding into to sub
  classes to facilite future development.

bug: 63633199

test:
cts-tradefed run cts-dev --module CtsMediaTestCases --compatibility:module-arg CtsMediaTestCases:include-annotation:android.platform.test.annotations.RequiresDevice

Change-Id: I2fe8519fb6907f315a8b513921fc1cc7f436e28d
diff --git a/media/extractors/mkv/MatroskaExtractor.cpp b/media/extractors/mkv/MatroskaExtractor.cpp
index 5a8e79d..e199f03 100644
--- a/media/extractors/mkv/MatroskaExtractor.cpp
+++ b/media/extractors/mkv/MatroskaExtractor.cpp
@@ -703,18 +703,22 @@
 
     int64_t seekTimeUs;
     ReadOptions::SeekMode mode;
-    if (options && options->getSeekTo(&seekTimeUs, &mode)
-            && !mExtractor->isLiveStreaming()) {
-        clearPendingFrames();
+    if (options && options->getSeekTo(&seekTimeUs, &mode)) {
+        if (mode == ReadOptions::SEEK_FRAME_INDEX) {
+            return ERROR_UNSUPPORTED;
+        }
 
-        // The audio we want is located by using the Cues to seek the video
-        // stream to find the target Cluster then iterating to finalize for
-        // audio.
-        int64_t actualFrameTimeUs;
-        mBlockIter.seek(seekTimeUs, mIsAudio, &actualFrameTimeUs);
+        if (!mExtractor->isLiveStreaming()) {
+            clearPendingFrames();
 
-        if (mode == ReadOptions::SEEK_CLOSEST) {
-            targetSampleTimeUs = actualFrameTimeUs;
+            // The audio we want is located by using the Cues to seek the video
+            // stream to find the target Cluster then iterating to finalize for
+            // audio.
+            int64_t actualFrameTimeUs;
+            mBlockIter.seek(seekTimeUs, mIsAudio, &actualFrameTimeUs);
+            if (mode == ReadOptions::SEEK_CLOSEST) {
+                targetSampleTimeUs = actualFrameTimeUs;
+            }
         }
     }
 
diff --git a/media/extractors/mp4/ItemTable.cpp b/media/extractors/mp4/ItemTable.cpp
index ed560e1..9a6cb64 100644
--- a/media/extractors/mp4/ItemTable.cpp
+++ b/media/extractors/mp4/ItemTable.cpp
@@ -40,8 +40,9 @@
     friend struct ItemReference;
     friend struct ItemProperty;
 
-    ImageItem() : ImageItem(0) {}
-    ImageItem(uint32_t _type) : type(_type),
+    ImageItem() : ImageItem(0, 0, false) {}
+    ImageItem(uint32_t _type, uint32_t _id, bool _hidden) :
+            type(_type), itemId(_id), hidden(_hidden),
             rows(0), columns(0), width(0), height(0), rotation(0),
             offset(0), size(0), nextTileIndex(0) {}
 
@@ -61,6 +62,8 @@
     }
 
     uint32_t type;
+    uint32_t itemId;
+    bool hidden;
     int32_t rows;
     int32_t columns;
     int32_t width;
@@ -496,7 +499,25 @@
             ALOGW("dimgRefs if not clean!");
         }
         derivedImage.dimgRefs.appendVector(mRefs);
+
+        for (size_t i = 0; i < mRefs.size(); i++) {
+            itemIndex = itemIdToItemMap.indexOfKey(mRefs[i]);
+
+            // ignore non-image items
+            if (itemIndex < 0) {
+                continue;
+            }
+            ImageItem &sourceImage = itemIdToItemMap.editValueAt(itemIndex);
+
+            // mark the source image of the derivation as hidden
+            sourceImage.hidden = true;
+        }
     } else if (type() == FOURCC('t', 'h', 'm', 'b')) {
+        // mark thumbnail image as hidden, these can be retrieved if the client
+        // request thumbnail explicitly, but won't be exposed as displayables.
+        ImageItem &thumbImage = itemIdToItemMap.editValueAt(itemIndex);
+        thumbImage.hidden = true;
+
         for (size_t i = 0; i < mRefs.size(); i++) {
             itemIndex = itemIdToItemMap.indexOfKey(mRefs[i]);
 
@@ -511,6 +532,10 @@
             }
             masterImage.thumbnails.push_back(mItemId);
         }
+    } else if (type() == FOURCC('a', 'u', 'x', 'l')) {
+        // mark auxiliary image as hidden
+        ImageItem &auxImage = itemIdToItemMap.editValueAt(itemIndex);
+        auxImage.hidden = true;
     } else {
         ALOGW("ignoring unsupported ref type 0x%x", type());
     }
@@ -942,6 +967,7 @@
 struct ItemInfo {
     uint32_t itemId;
     uint32_t itemType;
+    bool hidden;
 };
 
 struct InfeBox : public FullBox {
@@ -1012,6 +1038,9 @@
 
         itemInfo->itemId = item_id;
         itemInfo->itemType = item_type;
+        // According to HEIF spec, (flags & 1) indicates the image is hidden
+        // and not supposed to be displayed.
+        itemInfo->hidden = (flags() & 1);
 
         char itemTypeString[5];
         MakeFourCCString(item_type, itemTypeString);
@@ -1295,7 +1324,7 @@
             return ERROR_MALFORMED;
         }
 
-        ImageItem image(info.itemType);
+        ImageItem image(info.itemType, info.itemId, info.hidden);
 
         ALOGV("adding %s: itemId %d", image.isGrid() ? "grid" : "image", info.itemId);
 
@@ -1327,6 +1356,29 @@
         mItemReferences[i]->apply(mItemIdToItemMap);
     }
 
+    bool foundPrimary = false;
+    for (size_t i = 0; i < mItemIdToItemMap.size(); i++) {
+        // add all non-hidden images, also add the primary even if it's marked
+        // hidden, in case the primary is set to a thumbnail
+        bool isPrimary = (mItemIdToItemMap[i].itemId == mPrimaryItemId);
+        if (!mItemIdToItemMap[i].hidden || isPrimary) {
+            mDisplayables.push_back(i);
+        }
+        foundPrimary |= isPrimary;
+    }
+
+    ALOGV("found %zu displayables", mDisplayables.size());
+
+    // fail if no displayables are found
+    if (mDisplayables.empty()) {
+        return ERROR_MALFORMED;
+    }
+
+    // if the primary item id is invalid, set primary to the first displayable
+    if (!foundPrimary) {
+        mPrimaryItemId = mItemIdToItemMap[mDisplayables[0]].itemId;
+    }
+
     mImageItemsValid = true;
     return OK;
 }
@@ -1348,29 +1400,36 @@
     ALOGV("attach property %d to item id %d)",
             propertyIndex, association.itemId);
 
-    mItemProperties[propertyIndex]->attachTo(
-            mItemIdToItemMap.editValueAt(itemIndex));
+    mItemProperties[propertyIndex]->attachTo(mItemIdToItemMap.editValueAt(itemIndex));
 }
 
-sp<MetaData> ItemTable::getImageMeta() {
+uint32_t ItemTable::countImages() const {
+    return mImageItemsValid ? mDisplayables.size() : 0;
+}
+
+sp<MetaData> ItemTable::getImageMeta(const uint32_t imageIndex) {
     if (!mImageItemsValid) {
         return NULL;
     }
 
-    ssize_t itemIndex = mItemIdToItemMap.indexOfKey(mPrimaryItemId);
-    if (itemIndex < 0) {
-        ALOGE("Primary item id %d not found!", mPrimaryItemId);
+    if (imageIndex >= mDisplayables.size()) {
+        ALOGE("%s: invalid image index %u", __FUNCTION__, imageIndex);
         return NULL;
     }
-
-    ALOGV("primary item index %zu", itemIndex);
+    const uint32_t itemIndex = mDisplayables[imageIndex];
+    ALOGV("image[%u]: item index %u", imageIndex, itemIndex);
 
     const ImageItem *image = &mItemIdToItemMap[itemIndex];
 
     sp<MetaData> meta = new MetaData;
-    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_VIDEO_HEVC);
+    meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
 
-    ALOGV("setting image size %dx%d", image->width, image->height);
+    if (image->itemId == mPrimaryItemId) {
+        meta->setInt32(kKeyIsPrimaryImage, 1);
+    }
+
+    ALOGV("image[%u]: size %dx%d", imageIndex, image->width, image->height);
+
     meta->setInt32(kKeyWidth, image->width);
     meta->setInt32(kKeyHeight, image->height);
     if (image->rotation != 0) {
@@ -1394,8 +1453,8 @@
             meta->setInt32(kKeyThumbnailHeight, thumbnail.height);
             meta->setData(kKeyThumbnailHVCC, kTypeHVCC,
                     thumbnail.hvcc->data(), thumbnail.hvcc->size());
-            ALOGV("thumbnail meta: %dx%d, item index %zd",
-                    thumbnail.width, thumbnail.height, thumbItemIndex);
+            ALOGV("image[%u]: thumbnail: size %dx%d, item index %zd",
+                    imageIndex, thumbnail.width, thumbnail.height, thumbItemIndex);
         } else {
             ALOGW("%s: Referenced thumbnail does not exist!", __FUNCTION__);
         }
@@ -1406,23 +1465,18 @@
         if (tileItemIndex < 0) {
             return NULL;
         }
-        // when there are tiles, (kKeyWidth, kKeyHeight) is the full tiled area,
-        // and (kKeyDisplayWidth, kKeyDisplayHeight) may be smaller than that.
-        meta->setInt32(kKeyDisplayWidth, image->width);
-        meta->setInt32(kKeyDisplayHeight, image->height);
-        int32_t gridRows = image->rows, gridCols = image->columns;
+        meta->setInt32(kKeyGridRows, image->rows);
+        meta->setInt32(kKeyGridCols, image->columns);
 
         // point image to the first tile for grid size and HVCC
         image = &mItemIdToItemMap.editValueAt(tileItemIndex);
-        meta->setInt32(kKeyWidth, image->width * gridCols);
-        meta->setInt32(kKeyHeight, image->height * gridRows);
         meta->setInt32(kKeyGridWidth, image->width);
         meta->setInt32(kKeyGridHeight, image->height);
         meta->setInt32(kKeyMaxInputSize, image->width * image->height * 1.5);
     }
 
     if (image->hvcc == NULL) {
-        ALOGE("%s: hvcc is missing for item index %zd!", __FUNCTION__, itemIndex);
+        ALOGE("%s: hvcc is missing for image[%u]!", __FUNCTION__, imageIndex);
         return NULL;
     }
     meta->setData(kKeyHVCC, kTypeHVCC, image->hvcc->data(), image->hvcc->size());
@@ -1433,48 +1487,46 @@
     return meta;
 }
 
-uint32_t ItemTable::countImages() const {
-    return mImageItemsValid ? mItemIdToItemMap.size() : 0;
-}
-
-status_t ItemTable::findPrimaryImage(uint32_t *itemIndex) {
+status_t ItemTable::findImageItem(const uint32_t imageIndex, uint32_t *itemIndex) {
     if (!mImageItemsValid) {
         return INVALID_OPERATION;
     }
 
-    ssize_t index = mItemIdToItemMap.indexOfKey(mPrimaryItemId);
-    if (index < 0) {
-        return ERROR_MALFORMED;
+    if (imageIndex >= mDisplayables.size()) {
+        ALOGE("%s: invalid image index %d", __FUNCTION__, imageIndex);
+        return BAD_VALUE;
     }
 
-    *itemIndex = index;
+    *itemIndex = mDisplayables[imageIndex];
+
+    ALOGV("image[%u]: item index %u", imageIndex, *itemIndex);
     return OK;
 }
 
-status_t ItemTable::findThumbnail(uint32_t *itemIndex) {
+status_t ItemTable::findThumbnailItem(const uint32_t imageIndex, uint32_t *itemIndex) {
     if (!mImageItemsValid) {
         return INVALID_OPERATION;
     }
 
-    ssize_t primaryItemIndex = mItemIdToItemMap.indexOfKey(mPrimaryItemId);
-    if (primaryItemIndex < 0) {
-        ALOGE("%s: Primary item id %d not found!", __FUNCTION__, mPrimaryItemId);
-        return ERROR_MALFORMED;
+    if (imageIndex >= mDisplayables.size()) {
+        ALOGE("%s: invalid image index %d", __FUNCTION__, imageIndex);
+        return BAD_VALUE;
     }
 
-    const ImageItem &primaryImage = mItemIdToItemMap[primaryItemIndex];
-    if (primaryImage.thumbnails.empty()) {
-        ALOGW("%s: Using primary in place of thumbnail.", __FUNCTION__);
-        *itemIndex = primaryItemIndex;
+    uint32_t masterItemIndex = mDisplayables[imageIndex];
+
+    const ImageItem &masterImage = mItemIdToItemMap[masterItemIndex];
+    if (masterImage.thumbnails.empty()) {
+        *itemIndex = masterItemIndex;
         return OK;
     }
 
-    ssize_t thumbItemIndex = mItemIdToItemMap.indexOfKey(
-            primaryImage.thumbnails[0]);
+    ssize_t thumbItemIndex = mItemIdToItemMap.indexOfKey(masterImage.thumbnails[0]);
     if (thumbItemIndex < 0) {
-        ALOGE("%s: Thumbnail item id %d not found!",
-                __FUNCTION__, primaryImage.thumbnails[0]);
-        return ERROR_MALFORMED;
+        ALOGW("%s: Thumbnail item id %d not found, use master instead",
+                __FUNCTION__, masterImage.thumbnails[0]);
+        *itemIndex = masterItemIndex;
+        return OK;
     }
 
     *itemIndex = thumbItemIndex;
diff --git a/media/extractors/mp4/ItemTable.h b/media/extractors/mp4/ItemTable.h
index 6591271..3d2e2ae 100644
--- a/media/extractors/mp4/ItemTable.h
+++ b/media/extractors/mp4/ItemTable.h
@@ -49,12 +49,12 @@
     status_t parse(uint32_t type, off64_t offset, size_t size);
 
     bool isValid() { return mImageItemsValid; }
-    sp<MetaData> getImageMeta();
     uint32_t countImages() const;
-    status_t findPrimaryImage(uint32_t *imageIndex);
-    status_t findThumbnail(uint32_t *thumbnailIndex);
+    sp<MetaData> getImageMeta(const uint32_t imageIndex);
+    status_t findImageItem(const uint32_t imageIndex, uint32_t *itemIndex);
+    status_t findThumbnailItem(const uint32_t imageIndex, uint32_t *itemIndex);
     status_t getImageOffsetAndSize(
-            uint32_t *imageIndex, off64_t *offset, size_t *size);
+            uint32_t *itemIndex, off64_t *offset, size_t *size);
 
 protected:
     ~ItemTable();
@@ -78,6 +78,7 @@
     bool mImageItemsValid;
     uint32_t mCurrentItemIndex;
     KeyedVector<uint32_t, ImageItem> mItemIdToItemMap;
+    Vector<uint32_t> mDisplayables;
 
     status_t parseIlocBox(off64_t offset, size_t size);
     status_t parseIinfBox(off64_t offset, size_t size);
diff --git a/media/extractors/mp4/MPEG4Extractor.cpp b/media/extractors/mp4/MPEG4Extractor.cpp
index ede7e84..6671956 100644
--- a/media/extractors/mp4/MPEG4Extractor.cpp
+++ b/media/extractors/mp4/MPEG4Extractor.cpp
@@ -138,7 +138,7 @@
 
     uint8_t *mSrcBuffer;
 
-    bool mIsHEIF;
+    bool mIsHeif;
     sp<ItemTable> mItemTable;
 
     size_t parseNALSize(const uint8_t *data) const;
@@ -338,7 +338,7 @@
     return false;
 }
 
-MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
+MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source, const char *mime)
     : mMoofOffset(0),
       mMoofFound(false),
       mMdatFound(false),
@@ -346,12 +346,15 @@
       mInitCheck(NO_INIT),
       mHeaderTimescale(0),
       mIsQT(false),
-      mIsHEIF(false),
+      mIsHeif(false),
+      mIsHeifSequence(false),
+      mPreferHeif(mime != NULL && !strcasecmp(mime, MEDIA_MIMETYPE_CONTAINER_HEIF)),
       mFirstTrack(NULL),
       mLastTrack(NULL),
       mFileMetaData(new MetaData),
       mFirstSINF(NULL),
       mIsDrm(false) {
+    ALOGV("mime=%s, mPreferHeif=%d", mime, mPreferHeif);
 }
 
 MPEG4Extractor::~MPEG4Extractor() {
@@ -560,8 +563,9 @@
     status_t err;
     bool sawMoovOrSidx = false;
 
-    while (!((sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
-            (mIsHEIF && (mItemTable != NULL) && mItemTable->isValid()))) {
+    while (!((!mIsHeif && sawMoovOrSidx && (mMdatFound || mMoofFound)) ||
+             (mIsHeif && (mPreferHeif || !mIsHeifSequence)
+                     && (mItemTable != NULL) && mItemTable->isValid()))) {
         off64_t orig_offset = offset;
         err = parseChunk(&offset, 0);
 
@@ -578,12 +582,47 @@
         }
     }
 
+    if (mIsHeif) {
+        uint32_t imageCount = mItemTable->countImages();
+        if (imageCount == 0) {
+            ALOGE("found no image in heif!");
+        } else {
+            for (uint32_t imageIndex = 0; imageIndex < imageCount; imageIndex++) {
+                sp<MetaData> meta = mItemTable->getImageMeta(imageIndex);
+                if (meta == NULL) {
+                    ALOGE("heif image %u has no meta!", imageIndex);
+                    continue;
+                }
+
+                ALOGV("adding HEIF image track %u", imageIndex);
+                Track *track = new Track;
+                track->next = NULL;
+                if (mLastTrack != NULL) {
+                    mLastTrack->next = track;
+                } else {
+                    mFirstTrack = track;
+                }
+                mLastTrack = track;
+
+                track->meta = meta;
+                track->meta->setInt32(kKeyTrackID, imageIndex);
+                track->includes_expensive_metadata = false;
+                track->skipTrack = false;
+                track->timescale = 0;
+            }
+        }
+    }
+
     if (mInitCheck == OK) {
         if (findTrackByMimePrefix("video/") != NULL) {
             mFileMetaData->setCString(
                     kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
         } else if (findTrackByMimePrefix("audio/") != NULL) {
             mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
+        } else if (findTrackByMimePrefix(
+                MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC) != NULL) {
+            mFileMetaData->setCString(
+                    kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_HEIF);
         } else {
             mFileMetaData->setCString(kKeyMIMEType, "application/octet-stream");
         }
@@ -614,28 +653,6 @@
         free(buf);
     }
 
-    if (mIsHEIF) {
-        sp<MetaData> meta = mItemTable->getImageMeta();
-        if (meta == NULL) {
-            return ERROR_MALFORMED;
-        }
-
-        Track *track = mLastTrack;
-        if (track != NULL) {
-            ALOGW("track is set before metadata is fully processed");
-        } else {
-            track = new Track;
-            track->next = NULL;
-            mFirstTrack = mLastTrack = track;
-        }
-
-        track->meta = meta;
-        track->meta->setInt32(kKeyTrackID, 0);
-        track->includes_expensive_metadata = false;
-        track->skipTrack = false;
-        track->timescale = 0;
-    }
-
     return mInitCheck;
 }
 
@@ -1037,6 +1054,7 @@
                 }
                 isTrack = true;
 
+                ALOGV("adding new track");
                 Track *track = new Track;
                 track->next = NULL;
                 if (mLastTrack) {
@@ -1084,6 +1102,7 @@
                 }
 
                 if (mLastTrack->skipTrack) {
+                    ALOGV("skipping this track...");
                     Track *cur = mFirstTrack;
 
                     if (cur == mLastTrack) {
@@ -1260,6 +1279,25 @@
             break;
         }
 
+        case FOURCC('t', 'r', 'e', 'f'):
+        {
+            *offset += chunk_size;
+
+            if (mLastTrack == NULL) {
+                return ERROR_MALFORMED;
+            }
+
+            // Skip thumbnail track for now since we don't have an
+            // API to retrieve it yet.
+            // The thumbnail track can't be accessed by negative index or time,
+            // because each timed sample has its own corresponding thumbnail
+            // in the thumbnail track. We'll need a dedicated API to retrieve
+            // thumbnail at time instead.
+            mLastTrack->skipTrack = true;
+
+            break;
+        }
+
         case FOURCC('p', 's', 's', 'h'):
         {
             *offset += chunk_size;
@@ -1758,6 +1796,8 @@
                             mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
                         }
                     }
+                    ALOGV("setting frame count %zu", nSamples);
+                    mLastTrack->meta->setInt32(kKeyFrameCount, nSamples);
                 }
             }
 
@@ -2089,7 +2129,7 @@
         case FOURCC('i', 'r', 'e', 'f'):
         case FOURCC('i', 'p', 'r', 'o'):
         {
-            if (mIsHEIF) {
+            if (mIsHeif) {
                 if (mItemTable == NULL) {
                     mItemTable = new ItemTable(mDataSource);
                 }
@@ -2469,10 +2509,17 @@
 
             if (brandSet.count(FOURCC('q', 't', ' ', ' ')) > 0) {
                 mIsQT = true;
-            } else if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
-                    && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
-                mIsHEIF = true;
-                ALOGV("identified HEIF image");
+            } else {
+                if (brandSet.count(FOURCC('m', 'i', 'f', '1')) > 0
+                 && brandSet.count(FOURCC('h', 'e', 'i', 'c')) > 0) {
+                    mIsHeif = true;
+                    ALOGV("identified HEIF image");
+                }
+                if (brandSet.count(FOURCC('m', 's', 'f', '1')) > 0
+                 && brandSet.count(FOURCC('h', 'e', 'v', 'c')) > 0) {
+                    mIsHeifSequence = true;
+                    ALOGV("identified HEIF image sequence");
+                }
             }
 
             *offset = stop_offset;
@@ -3391,6 +3438,7 @@
         return NULL;
     }
 
+    sp<ItemTable> itemTable;
     if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
         uint32_t type;
         const void *data;
@@ -3404,7 +3452,8 @@
         if (size < 7 || ptr[0] != 1) {  // configurationVersion == 1
             return NULL;
         }
-    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)) {
+    } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC)
+            || !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
         uint32_t type;
         const void *data;
         size_t size;
@@ -3417,11 +3466,14 @@
         if (size < 22 || ptr[0] != 1) {  // configurationVersion == 1
             return NULL;
         }
+        if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
+            itemTable = mItemTable;
+        }
     }
 
     sp<MPEG4Source> source =  new MPEG4Source(this,
             track->meta, mDataSource, track->timescale, track->sampleTable,
-            mSidxEntries, trex, mMoofOffset, mItemTable);
+            mSidxEntries, trex, mMoofOffset, itemTable);
     if (source->init() != OK) {
         return NULL;
     }
@@ -3849,7 +3901,7 @@
       mBuffer(NULL),
       mWantsNALFragments(false),
       mSrcBuffer(NULL),
-      mIsHEIF(itemTable != NULL),
+      mIsHeif(itemTable != NULL),
       mItemTable(itemTable) {
 
     memset(&mTrackFragmentHeaderInfo, 0, sizeof(mTrackFragmentHeaderInfo));
@@ -3871,7 +3923,8 @@
     CHECK(success);
 
     mIsAVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC);
-    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
+    mIsHEVC = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC) ||
+              !strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC);
 
     if (mIsAVC) {
         uint32_t type;
@@ -4625,15 +4678,19 @@
     int64_t seekTimeUs;
     ReadOptions::SeekMode mode;
     if (options && options->getSeekTo(&seekTimeUs, &mode)) {
-        if (mIsHEIF) {
+        if (mIsHeif) {
             CHECK(mSampleTable == NULL);
             CHECK(mItemTable != NULL);
+            int32_t imageIndex;
+            if (!mFormat->findInt32(kKeyTrackID, &imageIndex)) {
+                return ERROR_MALFORMED;
+            }
 
             status_t err;
             if (seekTimeUs >= 0) {
-                err = mItemTable->findPrimaryImage(&mCurrentSampleIndex);
+                err = mItemTable->findImageItem(imageIndex, &mCurrentSampleIndex);
             } else {
-                err = mItemTable->findThumbnail(&mCurrentSampleIndex);
+                err = mItemTable->findThumbnailItem(imageIndex, &mCurrentSampleIndex);
             }
             if (err != OK) {
                 return err;
@@ -4651,6 +4708,9 @@
                 case ReadOptions::SEEK_CLOSEST:
                     findFlags = SampleTable::kFlagClosest;
                     break;
+                case ReadOptions::SEEK_FRAME_INDEX:
+                    findFlags = SampleTable::kFlagFrameIndex;
+                    break;
                 default:
                     CHECK(!"Should not be here.");
                     break;
@@ -4661,7 +4721,8 @@
                     seekTimeUs, 1000000, mTimescale,
                     &sampleIndex, findFlags);
 
-            if (mode == ReadOptions::SEEK_CLOSEST) {
+            if (mode == ReadOptions::SEEK_CLOSEST
+                    || mode == ReadOptions::SEEK_FRAME_INDEX) {
                 // We found the closest sample already, now we want the sync
                 // sample preceding it (or the sample itself of course), even
                 // if the subsequent sync sample is closer.
@@ -4693,7 +4754,8 @@
                 return err;
             }
 
-            if (mode == ReadOptions::SEEK_CLOSEST) {
+            if (mode == ReadOptions::SEEK_CLOSEST
+                || mode == ReadOptions::SEEK_FRAME_INDEX) {
                 targetSampleTimeUs = (sampleTime * 1000000ll) / mTimescale;
             }
 
@@ -4729,7 +4791,7 @@
         newBuffer = true;
 
         status_t err;
-        if (!mIsHEIF) {
+        if (!mIsHeif) {
             err = mSampleTable->getMetaDataForSample(
                     mCurrentSampleIndex, &offset, &size, &cts, &isSyncSample, &stts);
         } else {
@@ -5316,7 +5378,8 @@
         || !memcmp(header, "ftypisom", 8) || !memcmp(header, "ftypM4V ", 8)
         || !memcmp(header, "ftypM4A ", 8) || !memcmp(header, "ftypf4v ", 8)
         || !memcmp(header, "ftypkddi", 8) || !memcmp(header, "ftypM4VP", 8)
-        || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)) {
+        || !memcmp(header, "ftypmif1", 8) || !memcmp(header, "ftypheic", 8)
+        || !memcmp(header, "ftypmsf1", 8) || !memcmp(header, "ftyphevc", 8)) {
         *mimeType = MEDIA_MIMETYPE_CONTAINER_MPEG4;
         *confidence = 0.4;
 
@@ -5347,6 +5410,8 @@
         FOURCC('3', 'g', '2', 'b'),
         FOURCC('m', 'i', 'f', '1'),  // HEIF image
         FOURCC('h', 'e', 'i', 'c'),  // HEIF image
+        FOURCC('m', 's', 'f', '1'),  // HEIF image sequence
+        FOURCC('h', 'e', 'v', 'c'),  // HEIF image sequence
     };
 
     for (size_t i = 0;
diff --git a/media/extractors/mp4/MPEG4Extractor.h b/media/extractors/mp4/MPEG4Extractor.h
index c634796..d4f17e3 100644
--- a/media/extractors/mp4/MPEG4Extractor.h
+++ b/media/extractors/mp4/MPEG4Extractor.h
@@ -52,7 +52,7 @@
 class MPEG4Extractor : public MediaExtractor {
 public:
     // Extractor assumes ownership of "source".
-    explicit MPEG4Extractor(const sp<DataSource> &source);
+    explicit MPEG4Extractor(const sp<DataSource> &source, const char *mime = NULL);
 
     virtual size_t countTracks();
     virtual sp<MediaSource> getTrack(size_t index);
@@ -103,7 +103,9 @@
     status_t mInitCheck;
     uint32_t mHeaderTimescale;
     bool mIsQT;
-    bool mIsHEIF;
+    bool mIsHeif;
+    bool mIsHeifSequence;
+    bool mPreferHeif;
 
     Track *mFirstTrack, *mLastTrack;
 
diff --git a/media/extractors/mp4/SampleTable.cpp b/media/extractors/mp4/SampleTable.cpp
index fe25e95..378d63a 100644
--- a/media/extractors/mp4/SampleTable.cpp
+++ b/media/extractors/mp4/SampleTable.cpp
@@ -724,6 +724,14 @@
         return ERROR_OUT_OF_RANGE;
     }
 
+    if (flags == kFlagFrameIndex) {
+        if (req_time >= mNumSampleSizes) {
+            return ERROR_OUT_OF_RANGE;
+        }
+        *sample_index = mSampleTimeEntries[req_time].mSampleIndex;
+        return OK;
+    }
+
     uint32_t left = 0;
     uint32_t right_plus_one = mNumSampleSizes;
     while (left < right_plus_one) {
diff --git a/media/extractors/mp4/SampleTable.h b/media/extractors/mp4/SampleTable.h
index eb1a674..466e26b 100644
--- a/media/extractors/mp4/SampleTable.h
+++ b/media/extractors/mp4/SampleTable.h
@@ -72,7 +72,8 @@
     enum {
         kFlagBefore,
         kFlagAfter,
-        kFlagClosest
+        kFlagClosest,
+        kFlagFrameIndex,
     };
     status_t findSampleAtTime(
             uint64_t req_time, uint64_t scale_num, uint64_t scale_den,
diff --git a/media/extractors/mpeg2/MPEG2TSExtractor.cpp b/media/extractors/mpeg2/MPEG2TSExtractor.cpp
index abe2054..4f61e16 100644
--- a/media/extractors/mpeg2/MPEG2TSExtractor.cpp
+++ b/media/extractors/mpeg2/MPEG2TSExtractor.cpp
@@ -512,6 +512,8 @@
                 --index;
             }
             break;
+        default:
+            return ERROR_UNSUPPORTED;
     }
     if (!shouldSeekBeyond || mOffset <= mSeekSyncPoints->valueAt(index)) {
         int64_t actualSeekTimeUs = mSeekSyncPoints->keyAt(index);
diff --git a/media/libheif/HeifDecoderImpl.cpp b/media/libheif/HeifDecoderImpl.cpp
index 303f667..a63a2df 100644
--- a/media/libheif/HeifDecoderImpl.cpp
+++ b/media/libheif/HeifDecoderImpl.cpp
@@ -25,8 +25,8 @@
 #include <drm/drm_framework_common.h>
 #include <media/IDataSource.h>
 #include <media/mediametadataretriever.h>
-#include <media/stagefright/foundation/ADebug.h>
 #include <media/MediaSource.h>
+#include <media/stagefright/foundation/ADebug.h>
 #include <private/media/VideoFrame.h>
 #include <utils/Log.h>
 #include <utils/RefBase.h>
@@ -270,7 +270,9 @@
     // it's not, default to HAL_PIXEL_FORMAT_RGB_565.
     mOutputColor(HAL_PIXEL_FORMAT_RGB_565),
     mCurScanline(0),
-    mFrameDecoded(false) {
+    mFrameDecoded(false),
+    mHasImage(false),
+    mHasVideo(false) {
 }
 
 HeifDecoderImpl::~HeifDecoderImpl() {
@@ -278,6 +280,8 @@
 
 bool HeifDecoderImpl::init(HeifStream* stream, HeifFrameInfo* frameInfo) {
     mFrameDecoded = false;
+    mFrameMemory.clear();
+
     sp<HeifDataSource> dataSource = new HeifDataSource(stream);
     if (!dataSource->init()) {
         return false;
@@ -285,7 +289,7 @@
     mDataSource = dataSource;
 
     mRetriever = new MediaMetadataRetriever();
-    status_t err = mRetriever->setDataSource(mDataSource, "video/mp4");
+    status_t err = mRetriever->setDataSource(mDataSource, "image/heif");
     if (err != OK) {
         ALOGE("failed to set data source!");
 
@@ -295,15 +299,21 @@
     }
     ALOGV("successfully set data source.");
 
+    const char* hasImage = mRetriever->extractMetadata(METADATA_KEY_HAS_IMAGE);
     const char* hasVideo = mRetriever->extractMetadata(METADATA_KEY_HAS_VIDEO);
-    if (!hasVideo || strcasecmp(hasVideo, "yes")) {
-        ALOGE("no video: %s", hasVideo ? hasVideo : "null");
-        return false;
+
+    mHasImage = hasImage && !strcasecmp(hasImage, "yes");
+    mHasVideo = hasVideo && !strcasecmp(hasVideo, "yes");
+    if (mHasImage) {
+        // image index < 0 to retrieve primary image
+        mFrameMemory = mRetriever->getImageAtIndex(
+                -1, mOutputColor, true /*metaOnly*/);
+    } else if (mHasVideo) {
+        mFrameMemory = mRetriever->getFrameAtTime(0,
+                MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC,
+                mOutputColor, true /*metaOnly*/);
     }
 
-    mFrameMemory = mRetriever->getFrameAtTime(0,
-            MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC,
-            mOutputColor, true /*metaOnly*/);
     if (mFrameMemory == nullptr || mFrameMemory->pointer() == nullptr) {
         ALOGE("getFrameAtTime: videoFrame is a nullptr");
         return false;
@@ -368,8 +378,14 @@
         return true;
     }
 
-    mFrameMemory = mRetriever->getFrameAtTime(0,
-            MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC, mOutputColor);
+    if (mHasImage) {
+        // image index < 0 to retrieve primary image
+        mFrameMemory = mRetriever->getImageAtIndex(-1, mOutputColor);
+    } else if (mHasVideo) {
+        mFrameMemory = mRetriever->getFrameAtTime(0,
+                MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC, mOutputColor);
+    }
+
     if (mFrameMemory == nullptr || mFrameMemory->pointer() == nullptr) {
         ALOGE("getFrameAtTime: videoFrame is a nullptr");
         return false;
diff --git a/media/libheif/HeifDecoderImpl.h b/media/libheif/HeifDecoderImpl.h
index c2e4ff3..406c2c1 100644
--- a/media/libheif/HeifDecoderImpl.h
+++ b/media/libheif/HeifDecoderImpl.h
@@ -55,6 +55,8 @@
     android_pixel_format_t mOutputColor;
     size_t mCurScanline;
     bool mFrameDecoded;
+    bool mHasImage;
+    bool mHasVideo;
 };
 
 } // namespace android
diff --git a/media/libmedia/IMediaMetadataRetriever.cpp b/media/libmedia/IMediaMetadataRetriever.cpp
index 5ea2e8b..f725c97 100644
--- a/media/libmedia/IMediaMetadataRetriever.cpp
+++ b/media/libmedia/IMediaMetadataRetriever.cpp
@@ -68,6 +68,8 @@
     SET_DATA_SOURCE_FD,
     SET_DATA_SOURCE_CALLBACK,
     GET_FRAME_AT_TIME,
+    GET_IMAGE_AT_INDEX,
+    GET_FRAME_AT_INDEX,
     EXTRACT_ALBUM_ART,
     EXTRACT_METADATA,
 };
@@ -164,6 +166,55 @@
         return interface_cast<IMemory>(reply.readStrongBinder());
     }
 
+    sp<IMemory> getImageAtIndex(int index, int colorFormat, bool metaOnly)
+    {
+        ALOGV("getImageAtIndex: index %d, colorFormat(%d) metaOnly(%d)",
+                index, colorFormat, metaOnly);
+        Parcel data, reply;
+        data.writeInterfaceToken(IMediaMetadataRetriever::getInterfaceDescriptor());
+        data.writeInt32(index);
+        data.writeInt32(colorFormat);
+        data.writeInt32(metaOnly);
+#ifndef DISABLE_GROUP_SCHEDULE_HACK
+        sendSchedPolicy(data);
+#endif
+        remote()->transact(GET_IMAGE_AT_INDEX, data, &reply);
+        status_t ret = reply.readInt32();
+        if (ret != NO_ERROR) {
+            return NULL;
+        }
+        return interface_cast<IMemory>(reply.readStrongBinder());
+    }
+
+    status_t getFrameAtIndex(std::vector<sp<IMemory> > *frames,
+            int frameIndex, int numFrames, int colorFormat, bool metaOnly)
+    {
+        ALOGV("getFrameAtIndex: frameIndex(%d), numFrames(%d), colorFormat(%d) metaOnly(%d)",
+                frameIndex, numFrames, colorFormat, metaOnly);
+        Parcel data, reply;
+        data.writeInterfaceToken(IMediaMetadataRetriever::getInterfaceDescriptor());
+        data.writeInt32(frameIndex);
+        data.writeInt32(numFrames);
+        data.writeInt32(colorFormat);
+        data.writeInt32(metaOnly);
+#ifndef DISABLE_GROUP_SCHEDULE_HACK
+        sendSchedPolicy(data);
+#endif
+        remote()->transact(GET_FRAME_AT_INDEX, data, &reply);
+        status_t ret = reply.readInt32();
+        if (ret != NO_ERROR) {
+            return ret;
+        }
+        int retNumFrames = reply.readInt32();
+        if (retNumFrames < numFrames) {
+            numFrames = retNumFrames;
+        }
+        for (int i = 0; i < numFrames; i++) {
+            frames->push_back(interface_cast<IMemory>(reply.readStrongBinder()));
+        }
+        return OK;
+    }
+
     sp<IMemory> extractAlbumArt()
     {
         Parcel data, reply;
@@ -300,6 +351,54 @@
 #endif
             return NO_ERROR;
         } break;
+        case GET_IMAGE_AT_INDEX: {
+            CHECK_INTERFACE(IMediaMetadataRetriever, data, reply);
+            int index = data.readInt32();
+            int colorFormat = data.readInt32();
+            bool metaOnly = (data.readInt32() != 0);
+            ALOGV("getImageAtIndex: index(%d), colorFormat(%d), metaOnly(%d)",
+                    index, colorFormat, metaOnly);
+#ifndef DISABLE_GROUP_SCHEDULE_HACK
+            setSchedPolicy(data);
+#endif
+            sp<IMemory> bitmap = getImageAtIndex(index, colorFormat, metaOnly);
+            if (bitmap != 0) {  // Don't send NULL across the binder interface
+                reply->writeInt32(NO_ERROR);
+                reply->writeStrongBinder(IInterface::asBinder(bitmap));
+            } else {
+                reply->writeInt32(UNKNOWN_ERROR);
+            }
+#ifndef DISABLE_GROUP_SCHEDULE_HACK
+            restoreSchedPolicy();
+#endif
+            return NO_ERROR;
+        } break;
+        case GET_FRAME_AT_INDEX: {
+            CHECK_INTERFACE(IMediaMetadataRetriever, data, reply);
+            int frameIndex = data.readInt32();
+            int numFrames = data.readInt32();
+            int colorFormat = data.readInt32();
+            bool metaOnly = (data.readInt32() != 0);
+            ALOGV("getFrameAtIndex: frameIndex(%d), numFrames(%d), colorFormat(%d), metaOnly(%d)",
+                    frameIndex, numFrames, colorFormat, metaOnly);
+#ifndef DISABLE_GROUP_SCHEDULE_HACK
+            setSchedPolicy(data);
+#endif
+            std::vector<sp<IMemory> > frames;
+            status_t err = getFrameAtIndex(
+                    &frames, frameIndex, numFrames, colorFormat, metaOnly);
+            reply->writeInt32(err);
+            if (OK == err) {
+                reply->writeInt32(frames.size());
+                for (size_t i = 0; i < frames.size(); i++) {
+                    reply->writeStrongBinder(IInterface::asBinder(frames[i]));
+                }
+            }
+#ifndef DISABLE_GROUP_SCHEDULE_HACK
+            restoreSchedPolicy();
+#endif
+            return NO_ERROR;
+        } break;
         case EXTRACT_ALBUM_ART: {
             CHECK_INTERFACE(IMediaMetadataRetriever, data, reply);
 #ifndef DISABLE_GROUP_SCHEDULE_HACK
diff --git a/media/libmedia/include/media/IMediaMetadataRetriever.h b/media/libmedia/include/media/IMediaMetadataRetriever.h
index ea95161..5491535 100644
--- a/media/libmedia/include/media/IMediaMetadataRetriever.h
+++ b/media/libmedia/include/media/IMediaMetadataRetriever.h
@@ -44,6 +44,11 @@
             const sp<IDataSource>& dataSource, const char *mime) = 0;
     virtual sp<IMemory>     getFrameAtTime(
             int64_t timeUs, int option, int colorFormat, bool metaOnly) = 0;
+    virtual sp<IMemory>     getImageAtIndex(
+            int index, int colorFormat, bool metaOnly) = 0;
+    virtual status_t        getFrameAtIndex(
+            std::vector<sp<IMemory> > *frames,
+            int frameIndex, int numFrames, int colorFormat, bool metaOnly) = 0;
     virtual sp<IMemory>     extractAlbumArt() = 0;
     virtual const char*     extractMetadata(int keyCode) = 0;
 };
diff --git a/media/libmedia/include/media/IMediaSource.h b/media/libmedia/include/media/IMediaSource.h
index 1e36ab7..7aea90c 100644
--- a/media/libmedia/include/media/IMediaSource.h
+++ b/media/libmedia/include/media/IMediaSource.h
@@ -55,7 +55,6 @@
     // Returns the format of the data output by this media source.
     virtual sp<MetaData> getFormat() = 0;
 
-
     // Returns a new buffer of data. Call blocks until a
     // buffer is available, an error is encountered or the end of the stream
     // is reached.
diff --git a/media/libmedia/include/media/MediaMetadataRetrieverInterface.h b/media/libmedia/include/media/MediaMetadataRetrieverInterface.h
index 257002d..fc9e53c 100644
--- a/media/libmedia/include/media/MediaMetadataRetrieverInterface.h
+++ b/media/libmedia/include/media/MediaMetadataRetrieverInterface.h
@@ -22,6 +22,7 @@
 #include <media/mediametadataretriever.h>
 #include <media/mediascanner.h>
 #include <private/media/VideoFrame.h>
+#include <media/stagefright/MediaErrors.h>
 
 namespace android {
 
@@ -41,9 +42,14 @@
             const KeyedVector<String8, String8> *headers = NULL) = 0;
 
     virtual status_t    setDataSource(int fd, int64_t offset, int64_t length) = 0;
-    virtual status_t setDataSource(const sp<DataSource>& source, const char *mime) = 0;
+    virtual status_t    setDataSource(const sp<DataSource>& source, const char *mime) = 0;
     virtual VideoFrame* getFrameAtTime(
             int64_t timeUs, int option, int colorFormat, bool metaOnly) = 0;
+    virtual VideoFrame* getImageAtIndex(
+            int index, int colorFormat, bool metaOnly) = 0;
+    virtual status_t getFrameAtIndex(
+            std::vector<VideoFrame*>* frames,
+            int frameIndex, int numFrames, int colorFormat, bool metaOnly);
     virtual MediaAlbumArt* extractAlbumArt() = 0;
     virtual const char* extractMetadata(int keyCode) = 0;
 };
@@ -58,6 +64,13 @@
     virtual VideoFrame* getFrameAtTime(
             int64_t /*timeUs*/, int /*option*/, int /*colorFormat*/, bool /*metaOnly*/)
     { return NULL; }
+    virtual VideoFrame* getImageAtIndex(
+            int /*index*/, int /*colorFormat*/, bool /*metaOnly*/)
+    { return NULL; }
+    virtual status_t getFrameAtIndex(
+            std::vector<VideoFrame*>* /*frames*/,
+            int /*frameIndex*/, int /*numFrames*/, int /*colorFormat*/, bool /*metaOnly*/)
+    { return ERROR_UNSUPPORTED; }
     virtual MediaAlbumArt* extractAlbumArt() { return NULL; }
     virtual const char* extractMetadata(int /*keyCode*/) { return NULL; }
 };
diff --git a/media/libmedia/include/media/mediametadataretriever.h b/media/libmedia/include/media/mediametadataretriever.h
index 65c266b..3511253 100644
--- a/media/libmedia/include/media/mediametadataretriever.h
+++ b/media/libmedia/include/media/mediametadataretriever.h
@@ -59,6 +59,13 @@
     METADATA_KEY_LOCATION        = 23,
     METADATA_KEY_VIDEO_ROTATION  = 24,
     METADATA_KEY_CAPTURE_FRAMERATE = 25,
+    METADATA_KEY_HAS_IMAGE       = 26,
+    METADATA_KEY_IMAGE_COUNT     = 27,
+    METADATA_KEY_IMAGE_PRIMARY   = 28,
+    METADATA_KEY_IMAGE_WIDTH     = 29,
+    METADATA_KEY_IMAGE_HEIGHT    = 30,
+    METADATA_KEY_IMAGE_ROTATION  = 31,
+    METADATA_KEY_VIDEO_FRAME_COUNT  = 32,
 
     // Add more here...
 };
@@ -80,6 +87,11 @@
             const sp<IDataSource>& dataSource, const char *mime = NULL);
     sp<IMemory> getFrameAtTime(int64_t timeUs, int option,
             int colorFormat = HAL_PIXEL_FORMAT_RGB_565, bool metaOnly = false);
+    sp<IMemory> getImageAtIndex(int index,
+            int colorFormat = HAL_PIXEL_FORMAT_RGB_565, bool metaOnly = false);
+    status_t getFrameAtIndex(
+            std::vector<sp<IMemory> > *frames, int frameIndex, int numFrames = 1,
+            int colorFormat = HAL_PIXEL_FORMAT_RGB_565, bool metaOnly = false);
     sp<IMemory> extractAlbumArt();
     const char* extractMetadata(int keyCode);
 
diff --git a/media/libmedia/mediametadataretriever.cpp b/media/libmedia/mediametadataretriever.cpp
index 7d27d57..6a4204b 100644
--- a/media/libmedia/mediametadataretriever.cpp
+++ b/media/libmedia/mediametadataretriever.cpp
@@ -154,6 +154,32 @@
     return mRetriever->getFrameAtTime(timeUs, option, colorFormat, metaOnly);
 }
 
+sp<IMemory> MediaMetadataRetriever::getImageAtIndex(
+        int index, int colorFormat, bool metaOnly) {
+    ALOGV("getImageAtIndex: index(%d) colorFormat(%d) metaOnly(%d)",
+            index, colorFormat, metaOnly);
+    Mutex::Autolock _l(mLock);
+    if (mRetriever == 0) {
+        ALOGE("retriever is not initialized");
+        return NULL;
+    }
+    return mRetriever->getImageAtIndex(index, colorFormat, metaOnly);
+}
+
+status_t MediaMetadataRetriever::getFrameAtIndex(
+        std::vector<sp<IMemory> > *frames,
+        int frameIndex, int numFrames, int colorFormat, bool metaOnly) {
+    ALOGV("getFrameAtIndex: frameIndex(%d), numFrames(%d), colorFormat(%d) metaOnly(%d)",
+            frameIndex, numFrames, colorFormat, metaOnly);
+    Mutex::Autolock _l(mLock);
+    if (mRetriever == 0) {
+        ALOGE("retriever is not initialized");
+        return INVALID_OPERATION;
+    }
+    return mRetriever->getFrameAtIndex(
+            frames, frameIndex, numFrames, colorFormat, metaOnly);
+}
+
 const char* MediaMetadataRetriever::extractMetadata(int keyCode)
 {
     ALOGV("extractMetadata(%d)", keyCode);
diff --git a/media/libmediaextractor/include/media/MediaSource.h b/media/libmediaextractor/include/media/MediaSource.h
index 749a4df..504653b 100644
--- a/media/libmediaextractor/include/media/MediaSource.h
+++ b/media/libmediaextractor/include/media/MediaSource.h
@@ -60,6 +60,7 @@
             SEEK_NEXT_SYNC,
             SEEK_CLOSEST_SYNC,
             SEEK_CLOSEST,
+            SEEK_FRAME_INDEX,
         };
 
         ReadOptions();
diff --git a/media/libmediaplayerservice/MetadataRetrieverClient.cpp b/media/libmediaplayerservice/MetadataRetrieverClient.cpp
index 3aab9b0..16ed530 100644
--- a/media/libmediaplayerservice/MetadataRetrieverClient.cpp
+++ b/media/libmediaplayerservice/MetadataRetrieverClient.cpp
@@ -194,6 +194,25 @@
 
 Mutex MetadataRetrieverClient::sLock;
 
+static sp<IMemory> getThumbnail(VideoFrame* frame) {
+    std::unique_ptr<VideoFrame> frameDeleter(frame);
+
+    size_t size = frame->getFlattenedSize();
+    sp<MemoryHeapBase> heap = new MemoryHeapBase(size, 0, "MetadataRetrieverClient");
+    if (heap == NULL) {
+        ALOGE("failed to create MemoryDealer");
+        return NULL;
+    }
+    sp<IMemory> thrumbnail = new MemoryBase(heap, 0, size);
+    if (thrumbnail == NULL) {
+        ALOGE("not enough memory for VideoFrame size=%zu", size);
+        return NULL;
+    }
+    VideoFrame *frameCopy = static_cast<VideoFrame *>(thrumbnail->pointer());
+    frameCopy->copyFlattened(*frame);
+    return thrumbnail;
+}
+
 sp<IMemory> MetadataRetrieverClient::getFrameAtTime(
         int64_t timeUs, int option, int colorFormat, bool metaOnly)
 {
@@ -206,29 +225,55 @@
         ALOGE("retriever is not initialized");
         return NULL;
     }
-    VideoFrame *frame = mRetriever->getFrameAtTime(
-            timeUs, option, colorFormat, metaOnly);
+    VideoFrame *frame = mRetriever->getFrameAtTime(timeUs, option, colorFormat, metaOnly);
     if (frame == NULL) {
         ALOGE("failed to capture a video frame");
         return NULL;
     }
-    size_t size = frame->getFlattenedSize();
-    sp<MemoryHeapBase> heap = new MemoryHeapBase(size, 0, "MetadataRetrieverClient");
-    if (heap == NULL) {
-        ALOGE("failed to create MemoryDealer");
-        delete frame;
+    return getThumbnail(frame);
+}
+
+sp<IMemory> MetadataRetrieverClient::getImageAtIndex(
+        int index, int colorFormat, bool metaOnly) {
+    ALOGV("getFrameAtTime: index(%d) colorFormat(%d), metaOnly(%d)",
+            index, colorFormat, metaOnly);
+    Mutex::Autolock lock(mLock);
+    Mutex::Autolock glock(sLock);
+    mThumbnail.clear();
+    if (mRetriever == NULL) {
+        ALOGE("retriever is not initialized");
         return NULL;
     }
-    mThumbnail = new MemoryBase(heap, 0, size);
-    if (mThumbnail == NULL) {
-        ALOGE("not enough memory for VideoFrame size=%zu", size);
-        delete frame;
+    VideoFrame *frame = mRetriever->getImageAtIndex(index, colorFormat, metaOnly);
+    if (frame == NULL) {
+        ALOGE("failed to extract image");
         return NULL;
     }
-    VideoFrame *frameCopy = static_cast<VideoFrame *>(mThumbnail->pointer());
-    frameCopy->copyFlattened(*frame);
-    delete frame;  // Fix memory leakage
-    return mThumbnail;
+    return getThumbnail(frame);
+}
+
+status_t MetadataRetrieverClient::getFrameAtIndex(
+            std::vector<sp<IMemory> > *frames,
+            int frameIndex, int numFrames, int colorFormat, bool metaOnly) {
+    ALOGV("getFrameAtIndex: frameIndex(%d), numFrames(%d), colorFormat(%d), metaOnly(%d)",
+            frameIndex, numFrames, colorFormat, metaOnly);
+    Mutex::Autolock lock(mLock);
+    Mutex::Autolock glock(sLock);
+    if (mRetriever == NULL) {
+        ALOGE("retriever is not initialized");
+        return INVALID_OPERATION;
+    }
+
+    std::vector<VideoFrame*> videoFrames;
+    status_t err = mRetriever->getFrameAtIndex(
+            &videoFrames, frameIndex, numFrames, colorFormat, metaOnly);
+    if (err != OK) {
+        return err;
+    }
+    for (size_t i = 0; i < videoFrames.size(); i++) {
+        frames->push_back(getThumbnail(videoFrames[i]));
+    }
+    return OK;
 }
 
 sp<IMemory> MetadataRetrieverClient::extractAlbumArt()
diff --git a/media/libmediaplayerservice/MetadataRetrieverClient.h b/media/libmediaplayerservice/MetadataRetrieverClient.h
index c78cd4b..f71891a 100644
--- a/media/libmediaplayerservice/MetadataRetrieverClient.h
+++ b/media/libmediaplayerservice/MetadataRetrieverClient.h
@@ -52,6 +52,11 @@
     virtual status_t                setDataSource(const sp<IDataSource>& source, const char *mime);
     virtual sp<IMemory>             getFrameAtTime(
             int64_t timeUs, int option, int colorFormat, bool metaOnly);
+    virtual sp<IMemory>             getImageAtIndex(
+            int index, int colorFormat, bool metaOnly);
+    virtual status_t getFrameAtIndex(
+                std::vector<sp<IMemory> > *frames,
+                int frameIndex, int numFrames, int colorFormat, bool metaOnly);
     virtual sp<IMemory>             extractAlbumArt();
     virtual const char*             extractMetadata(int keyCode);
 
diff --git a/media/libstagefright/Android.bp b/media/libstagefright/Android.bp
index fe1b285..4c7259f 100644
--- a/media/libstagefright/Android.bp
+++ b/media/libstagefright/Android.bp
@@ -46,6 +46,7 @@
         "DataSourceFactory.cpp",
         "DataURISource.cpp",
         "FileSource.cpp",
+        "FrameDecoder.cpp",
         "FrameRenderTracker.cpp",
         "HTTPBase.cpp",
         "HevcUtils.cpp",
diff --git a/media/libstagefright/FrameDecoder.cpp b/media/libstagefright/FrameDecoder.cpp
new file mode 100644
index 0000000..fa5f37ec
--- /dev/null
+++ b/media/libstagefright/FrameDecoder.cpp
@@ -0,0 +1,608 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define LOG_NDEBUG 0
+#define LOG_TAG "FrameDecoder"
+
+#include <inttypes.h>
+
+#include <utils/Log.h>
+#include <gui/Surface.h>
+
+#include "include/FrameDecoder.h"
+#include <media/ICrypto.h>
+#include <media/IMediaSource.h>
+#include <media/MediaCodecBuffer.h>
+#include <media/stagefright/foundation/avc_utils.h>
+#include <media/stagefright/foundation/ADebug.h>
+#include <media/stagefright/foundation/AMessage.h>
+#include <media/stagefright/ColorConverter.h>
+#include <media/stagefright/MediaBuffer.h>
+#include <media/stagefright/MediaCodec.h>
+#include <media/stagefright/MediaDefs.h>
+#include <media/stagefright/MediaErrors.h>
+#include <media/stagefright/Utils.h>
+#include <private/media/VideoFrame.h>
+
+namespace android {
+
+static const int64_t kBufferTimeOutUs = 30000ll; // 30 msec
+static const size_t kRetryCount = 20; // must be >0
+
+VideoFrame *FrameDecoder::allocVideoFrame(
+        int32_t width, int32_t height, bool metaOnly) {
+    int32_t rotationAngle;
+    if (!mTrackMeta->findInt32(kKeyRotation, &rotationAngle)) {
+        rotationAngle = 0;  // By default, no rotation
+    }
+
+    uint32_t type;
+    const void *iccData;
+    size_t iccSize;
+    if (!mTrackMeta->findData(kKeyIccProfile, &type, &iccData, &iccSize)){
+        iccData = NULL;
+        iccSize = 0;
+    }
+
+    int32_t sarWidth, sarHeight;
+    int32_t displayWidth, displayHeight;
+    if (mTrackMeta->findInt32(kKeySARWidth, &sarWidth)
+            && mTrackMeta->findInt32(kKeySARHeight, &sarHeight)
+            && sarHeight != 0) {
+        displayWidth = (width * sarWidth) / sarHeight;
+        displayHeight = height;
+    } else if (mTrackMeta->findInt32(kKeyDisplayWidth, &displayWidth)
+                && mTrackMeta->findInt32(kKeyDisplayHeight, &displayHeight)
+                && displayWidth > 0 && displayHeight > 0
+                && width > 0 && height > 0) {
+        ALOGV("found display size %dx%d", displayWidth, displayHeight);
+    } else {
+        displayWidth = width;
+        displayHeight = height;
+    }
+
+    return new VideoFrame(width, height, displayWidth, displayHeight,
+            rotationAngle, mDstBpp, !metaOnly, iccData, iccSize);
+}
+
+bool FrameDecoder::setDstColorFormat(android_pixel_format_t colorFormat) {
+    switch (colorFormat) {
+        case HAL_PIXEL_FORMAT_RGB_565:
+        {
+            mDstFormat = OMX_COLOR_Format16bitRGB565;
+            mDstBpp = 2;
+            return true;
+        }
+        case HAL_PIXEL_FORMAT_RGBA_8888:
+        {
+            mDstFormat = OMX_COLOR_Format32BitRGBA8888;
+            mDstBpp = 4;
+            return true;
+        }
+        case HAL_PIXEL_FORMAT_BGRA_8888:
+        {
+            mDstFormat = OMX_COLOR_Format32bitBGRA8888;
+            mDstBpp = 4;
+            return true;
+        }
+        default:
+        {
+            ALOGE("Unsupported color format: %d", colorFormat);
+            break;
+        }
+    }
+    return false;
+}
+
+VideoFrame* FrameDecoder::extractFrame(
+        int64_t frameTimeUs, int option, int colorFormat, bool metaOnly) {
+    if (!setDstColorFormat((android_pixel_format_t)colorFormat)) {
+        return NULL;
+    }
+
+    if (metaOnly) {
+        int32_t width, height;
+        CHECK(trackMeta()->findInt32(kKeyWidth, &width));
+        CHECK(trackMeta()->findInt32(kKeyHeight, &height));
+        return allocVideoFrame(width, height, true);
+    }
+
+    status_t err = extractInternal(frameTimeUs, 1, option);
+    if (err != OK) {
+        return NULL;
+    }
+
+    return mFrames.size() > 0 ? mFrames[0].release() : NULL;
+}
+
+status_t FrameDecoder::extractFrames(
+        int64_t frameTimeUs, size_t numFrames, int option, int colorFormat,
+        std::vector<VideoFrame*>* frames) {
+    if (!setDstColorFormat((android_pixel_format_t)colorFormat)) {
+        return ERROR_UNSUPPORTED;
+    }
+
+    status_t err = extractInternal(frameTimeUs, numFrames, option);
+    if (err != OK) {
+        return err;
+    }
+
+    for (size_t i = 0; i < mFrames.size(); i++) {
+        frames->push_back(mFrames[i].release());
+    }
+    return OK;
+}
+
+status_t FrameDecoder::extractInternal(
+        int64_t frameTimeUs, size_t numFrames, int option) {
+
+    MediaSource::ReadOptions options;
+    sp<AMessage> videoFormat = onGetFormatAndSeekOptions(
+            frameTimeUs, numFrames, option, &options);
+    if (videoFormat == NULL) {
+        ALOGE("video format or seek mode not supported");
+        return ERROR_UNSUPPORTED;
+    }
+
+    status_t err;
+    sp<ALooper> looper = new ALooper;
+    looper->start();
+    sp<MediaCodec> decoder = MediaCodec::CreateByComponentName(
+            looper, mComponentName, &err);
+    if (decoder.get() == NULL || err != OK) {
+        ALOGW("Failed to instantiate decoder [%s]", mComponentName.c_str());
+        return (decoder.get() == NULL) ? NO_MEMORY : err;
+    }
+
+    err = decoder->configure(videoFormat, NULL /* surface */, NULL /* crypto */, 0 /* flags */);
+    if (err != OK) {
+        ALOGW("configure returned error %d (%s)", err, asString(err));
+        decoder->release();
+        return err;
+    }
+
+    err = decoder->start();
+    if (err != OK) {
+        ALOGW("start returned error %d (%s)", err, asString(err));
+        decoder->release();
+        return err;
+    }
+
+    err = mSource->start();
+    if (err != OK) {
+        ALOGW("source failed to start: %d (%s)", err, asString(err));
+        decoder->release();
+        return err;
+    }
+
+    Vector<sp<MediaCodecBuffer> > inputBuffers;
+    err = decoder->getInputBuffers(&inputBuffers);
+    if (err != OK) {
+        ALOGW("failed to get input buffers: %d (%s)", err, asString(err));
+        decoder->release();
+        mSource->stop();
+        return err;
+    }
+
+    Vector<sp<MediaCodecBuffer> > outputBuffers;
+    err = decoder->getOutputBuffers(&outputBuffers);
+    if (err != OK) {
+        ALOGW("failed to get output buffers: %d (%s)", err, asString(err));
+        decoder->release();
+        mSource->stop();
+        return err;
+    }
+
+    sp<AMessage> outputFormat = NULL;
+    bool haveMoreInputs = true;
+    size_t index, offset, size;
+    int64_t timeUs;
+    size_t retriesLeft = kRetryCount;
+    bool done = false;
+    bool firstSample = true;
+    do {
+        size_t inputIndex = -1;
+        int64_t ptsUs = 0ll;
+        uint32_t flags = 0;
+        sp<MediaCodecBuffer> codecBuffer = NULL;
+
+        while (haveMoreInputs) {
+            err = decoder->dequeueInputBuffer(&inputIndex, kBufferTimeOutUs);
+            if (err != OK) {
+                ALOGW("Timed out waiting for input");
+                if (retriesLeft) {
+                    err = OK;
+                }
+                break;
+            }
+            codecBuffer = inputBuffers[inputIndex];
+
+            MediaBuffer *mediaBuffer = NULL;
+
+            err = mSource->read(&mediaBuffer, &options);
+            options.clearSeekTo();
+            if (err != OK) {
+                ALOGW("Input Error or EOS");
+                haveMoreInputs = false;
+                if (!firstSample && err == ERROR_END_OF_STREAM) {
+                    err = OK;
+                }
+                break;
+            }
+
+            if (mediaBuffer->range_length() > codecBuffer->capacity()) {
+                ALOGE("buffer size (%zu) too large for codec input size (%zu)",
+                        mediaBuffer->range_length(), codecBuffer->capacity());
+                haveMoreInputs = false;
+                err = BAD_VALUE;
+            } else {
+                codecBuffer->setRange(0, mediaBuffer->range_length());
+
+                CHECK(mediaBuffer->meta_data()->findInt64(kKeyTime, &ptsUs));
+                memcpy(codecBuffer->data(),
+                        (const uint8_t*)mediaBuffer->data() + mediaBuffer->range_offset(),
+                        mediaBuffer->range_length());
+
+                onInputReceived(codecBuffer, mediaBuffer->meta_data(), firstSample, &flags);
+                firstSample = false;
+            }
+
+            mediaBuffer->release();
+            break;
+        }
+
+        if (haveMoreInputs && inputIndex < inputBuffers.size()) {
+            ALOGV("QueueInput: size=%zu ts=%" PRId64 " us flags=%x",
+                    codecBuffer->size(), ptsUs, flags);
+
+            err = decoder->queueInputBuffer(
+                    inputIndex,
+                    codecBuffer->offset(),
+                    codecBuffer->size(),
+                    ptsUs,
+                    flags);
+
+            if (flags & MediaCodec::BUFFER_FLAG_EOS) {
+                haveMoreInputs = false;
+            }
+
+            // we don't expect an output from codec config buffer
+            if (flags & MediaCodec::BUFFER_FLAG_CODECCONFIG) {
+                continue;
+            }
+        }
+
+        while (err == OK) {
+            // wait for a decoded buffer
+            err = decoder->dequeueOutputBuffer(
+                    &index,
+                    &offset,
+                    &size,
+                    &timeUs,
+                    &flags,
+                    kBufferTimeOutUs);
+
+            if (err == INFO_FORMAT_CHANGED) {
+                ALOGV("Received format change");
+                err = decoder->getOutputFormat(&outputFormat);
+            } else if (err == INFO_OUTPUT_BUFFERS_CHANGED) {
+                ALOGV("Output buffers changed");
+                err = decoder->getOutputBuffers(&outputBuffers);
+            } else {
+                if (err == -EAGAIN /* INFO_TRY_AGAIN_LATER */ && --retriesLeft > 0) {
+                    ALOGV("Timed-out waiting for output.. retries left = %zu", retriesLeft);
+                    err = OK;
+                } else if (err == OK) {
+                    // If we're seeking with CLOSEST option and obtained a valid targetTimeUs
+                    // from the extractor, decode to the specified frame. Otherwise we're done.
+                    ALOGV("Received an output buffer, timeUs=%lld", (long long)timeUs);
+                    sp<MediaCodecBuffer> videoFrameBuffer = outputBuffers.itemAt(index);
+
+                    err = onOutputReceived(videoFrameBuffer, outputFormat, timeUs, &done);
+
+                    decoder->releaseOutputBuffer(index);
+                } else {
+                    ALOGW("Received error %d (%s) instead of output", err, asString(err));
+                    done = true;
+                }
+                break;
+            }
+        }
+    } while (err == OK && !done);
+
+    mSource->stop();
+    decoder->release();
+
+    if (err != OK) {
+        ALOGE("failed to get video frame (err %d)", err);
+    }
+
+    return err;
+}
+
+sp<AMessage> VideoFrameDecoder::onGetFormatAndSeekOptions(
+        int64_t frameTimeUs, size_t numFrames, int seekMode, MediaSource::ReadOptions *options) {
+    mSeekMode = static_cast<MediaSource::ReadOptions::SeekMode>(seekMode);
+    if (mSeekMode < MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC ||
+            mSeekMode > MediaSource::ReadOptions::SEEK_FRAME_INDEX) {
+        ALOGE("Unknown seek mode: %d", mSeekMode);
+        return NULL;
+    }
+    mNumFrames = numFrames;
+
+    const char *mime;
+    if (!trackMeta()->findCString(kKeyMIMEType, &mime)) {
+        ALOGE("Could not find mime type");
+        return NULL;
+    }
+
+    mIsAvcOrHevc = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
+            || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
+
+    if (frameTimeUs < 0) {
+        int64_t thumbNailTime;
+        if (!trackMeta()->findInt64(kKeyThumbnailTime, &thumbNailTime)
+                || thumbNailTime < 0) {
+            thumbNailTime = 0;
+        }
+        options->setSeekTo(thumbNailTime, mSeekMode);
+    } else {
+        options->setSeekTo(frameTimeUs, mSeekMode);
+    }
+
+    sp<AMessage> videoFormat;
+    if (convertMetaDataToMessage(trackMeta(), &videoFormat) != OK) {
+        ALOGE("b/23680780");
+        ALOGW("Failed to convert meta data to message");
+        return NULL;
+    }
+
+    // TODO: Use Flexible color instead
+    videoFormat->setInt32("color-format", OMX_COLOR_FormatYUV420Planar);
+
+    // For the thumbnail extraction case, try to allocate single buffer in both
+    // input and output ports, if seeking to a sync frame. NOTE: This request may
+    // fail if component requires more than that for decoding.
+    bool isSeekingClosest = (mSeekMode == MediaSource::ReadOptions::SEEK_CLOSEST)
+            || (mSeekMode == MediaSource::ReadOptions::SEEK_FRAME_INDEX);
+    if (!isSeekingClosest) {
+        videoFormat->setInt32("android._num-input-buffers", 1);
+        videoFormat->setInt32("android._num-output-buffers", 1);
+    }
+    return videoFormat;
+}
+
+status_t VideoFrameDecoder::onInputReceived(
+        const sp<MediaCodecBuffer> &codecBuffer,
+        const sp<MetaData> &sampleMeta, bool firstSample, uint32_t *flags) {
+    bool isSeekingClosest = (mSeekMode == MediaSource::ReadOptions::SEEK_CLOSEST)
+            || (mSeekMode == MediaSource::ReadOptions::SEEK_FRAME_INDEX);
+
+    if (firstSample && isSeekingClosest) {
+        sampleMeta->findInt64(kKeyTargetTime, &mTargetTimeUs);
+        ALOGV("Seeking closest: targetTimeUs=%lld", (long long)mTargetTimeUs);
+    }
+
+    if (mIsAvcOrHevc && !isSeekingClosest
+            && IsIDR(codecBuffer->data(), codecBuffer->size())) {
+        // Only need to decode one IDR frame, unless we're seeking with CLOSEST
+        // option, in which case we need to actually decode to targetTimeUs.
+        *flags |= MediaCodec::BUFFER_FLAG_EOS;
+    }
+    return OK;
+}
+
+status_t VideoFrameDecoder::onOutputReceived(
+        const sp<MediaCodecBuffer> &videoFrameBuffer,
+        const sp<AMessage> &outputFormat,
+        int64_t timeUs, bool *done) {
+    bool shouldOutput = (mTargetTimeUs < 0ll) || (timeUs >= mTargetTimeUs);
+
+    // If this is not the target frame, skip color convert.
+    if (!shouldOutput) {
+        *done = false;
+        return OK;
+    }
+
+    *done = (++mNumFramesDecoded >= mNumFrames);
+
+    int32_t width, height;
+    CHECK(outputFormat != NULL);
+    CHECK(outputFormat->findInt32("width", &width));
+    CHECK(outputFormat->findInt32("height", &height));
+
+    int32_t crop_left, crop_top, crop_right, crop_bottom;
+    if (!outputFormat->findRect("crop", &crop_left, &crop_top, &crop_right, &crop_bottom)) {
+        crop_left = crop_top = 0;
+        crop_right = width - 1;
+        crop_bottom = height - 1;
+    }
+
+    VideoFrame *frame = allocVideoFrame(
+            (crop_right - crop_left + 1),
+            (crop_bottom - crop_top + 1),
+            false /*metaOnly*/);
+    addFrame(frame);
+
+    int32_t srcFormat;
+    CHECK(outputFormat->findInt32("color-format", &srcFormat));
+
+    ColorConverter converter((OMX_COLOR_FORMATTYPE)srcFormat, dstFormat());
+
+    if (converter.isValid()) {
+        converter.convert(
+                (const uint8_t *)videoFrameBuffer->data(),
+                width, height,
+                crop_left, crop_top, crop_right, crop_bottom,
+                frame->mData,
+                frame->mWidth,
+                frame->mHeight,
+                crop_left, crop_top, crop_right, crop_bottom);
+        return OK;
+    }
+
+    ALOGE("Unable to convert from format 0x%08x to 0x%08x",
+                srcFormat, dstFormat());
+    return ERROR_UNSUPPORTED;
+}
+
+sp<AMessage> ImageDecoder::onGetFormatAndSeekOptions(
+        int64_t frameTimeUs, size_t /*numFrames*/,
+        int /*seekMode*/, MediaSource::ReadOptions *options) {
+    sp<MetaData> overrideMeta;
+    if (frameTimeUs < 0) {
+        uint32_t type;
+        const void *data;
+        size_t size;
+        int64_t thumbNailTime = 0;
+        int32_t thumbnailWidth, thumbnailHeight;
+
+        // if we have a stand-alone thumbnail, set up the override meta,
+        // and set seekTo time to -1.
+        if (trackMeta()->findInt32(kKeyThumbnailWidth, &thumbnailWidth)
+         && trackMeta()->findInt32(kKeyThumbnailHeight, &thumbnailHeight)
+         && trackMeta()->findData(kKeyThumbnailHVCC, &type, &data, &size)){
+            overrideMeta = new MetaData(*(trackMeta()));
+            overrideMeta->remove(kKeyDisplayWidth);
+            overrideMeta->remove(kKeyDisplayHeight);
+            overrideMeta->setInt32(kKeyWidth, thumbnailWidth);
+            overrideMeta->setInt32(kKeyHeight, thumbnailHeight);
+            overrideMeta->setData(kKeyHVCC, type, data, size);
+            thumbNailTime = -1ll;
+            ALOGV("thumbnail: %dx%d", thumbnailWidth, thumbnailHeight);
+        }
+        options->setSeekTo(thumbNailTime);
+    } else {
+        options->setSeekTo(frameTimeUs);
+    }
+
+    mGridRows = mGridCols = 1;
+    if (overrideMeta == NULL) {
+        // check if we're dealing with a tiled heif
+        int32_t gridWidth, gridHeight, gridRows, gridCols;
+        if (trackMeta()->findInt32(kKeyGridWidth, &gridWidth) && gridWidth > 0
+         && trackMeta()->findInt32(kKeyGridHeight, &gridHeight) && gridHeight > 0
+         && trackMeta()->findInt32(kKeyGridRows, &gridRows) && gridRows > 0
+         && trackMeta()->findInt32(kKeyGridCols, &gridCols) && gridCols > 0) {
+            int32_t width, height;
+            CHECK(trackMeta()->findInt32(kKeyWidth, &width));
+            CHECK(trackMeta()->findInt32(kKeyHeight, &height));
+
+            if (width <= gridWidth * gridCols && height <= gridHeight * gridRows) {
+                ALOGV("grid: %dx%d, size: %dx%d, picture size: %dx%d",
+                        gridCols, gridRows, gridWidth, gridHeight, width, height);
+
+                overrideMeta = new MetaData(*(trackMeta()));
+                overrideMeta->setInt32(kKeyWidth, gridWidth);
+                overrideMeta->setInt32(kKeyHeight, gridHeight);
+                mGridCols = gridCols;
+                mGridRows = gridRows;
+            } else {
+                ALOGE("bad grid: %dx%d, size: %dx%d, picture size: %dx%d",
+                        gridCols, gridRows, gridWidth, gridHeight, width, height);
+            }
+        }
+        if (overrideMeta == NULL) {
+            overrideMeta = trackMeta();
+        }
+    }
+
+    sp<AMessage> videoFormat;
+    if (convertMetaDataToMessage(overrideMeta, &videoFormat) != OK) {
+        ALOGE("b/23680780");
+        ALOGW("Failed to convert meta data to message");
+        return NULL;
+    }
+
+    // TODO: Use Flexible color instead
+    videoFormat->setInt32("color-format", OMX_COLOR_FormatYUV420Planar);
+
+    if ((mGridRows == 1) && (mGridCols == 1)) {
+        videoFormat->setInt32("android._num-input-buffers", 1);
+        videoFormat->setInt32("android._num-output-buffers", 1);
+    }
+    return videoFormat;
+}
+
+status_t ImageDecoder::onOutputReceived(
+        const sp<MediaCodecBuffer> &videoFrameBuffer,
+        const sp<AMessage> &outputFormat, int64_t /*timeUs*/, bool *done) {
+    int32_t width, height;
+    CHECK(outputFormat != NULL);
+    CHECK(outputFormat->findInt32("width", &width));
+    CHECK(outputFormat->findInt32("height", &height));
+
+    int32_t imageWidth, imageHeight;
+    CHECK(trackMeta()->findInt32(kKeyWidth, &imageWidth));
+    CHECK(trackMeta()->findInt32(kKeyHeight, &imageHeight));
+
+    if (mFrame == NULL) {
+        mFrame = allocVideoFrame(imageWidth, imageHeight, false /*metaOnly*/);
+
+        addFrame(mFrame);
+    }
+
+    int32_t srcFormat;
+    CHECK(outputFormat->findInt32("color-format", &srcFormat));
+
+    ColorConverter converter((OMX_COLOR_FORMATTYPE)srcFormat, dstFormat());
+
+    int32_t dstLeft, dstTop, dstRight, dstBottom;
+    int32_t numTiles = mGridRows * mGridCols;
+
+    dstLeft = mTilesDecoded % mGridCols * width;
+    dstTop = mTilesDecoded / mGridCols * height;
+    dstRight = dstLeft + width - 1;
+    dstBottom = dstTop + height - 1;
+
+    int32_t crop_left, crop_top, crop_right, crop_bottom;
+    if (!outputFormat->findRect("crop", &crop_left, &crop_top, &crop_right, &crop_bottom)) {
+        crop_left = crop_top = 0;
+        crop_right = width - 1;
+        crop_bottom = height - 1;
+    }
+
+    // apply crop on bottom-right
+    // TODO: need to move this into the color converter itself.
+    if (dstRight >= imageWidth) {
+        crop_right = imageWidth - dstLeft - 1;
+        dstRight = dstLeft + crop_right;
+    }
+    if (dstBottom >= imageHeight) {
+        crop_bottom = imageHeight - dstTop - 1;
+        dstBottom = dstTop + crop_bottom;
+    }
+
+    *done = (++mTilesDecoded >= numTiles);
+
+    if (converter.isValid()) {
+        converter.convert(
+                (const uint8_t *)videoFrameBuffer->data(),
+                width, height,
+                crop_left, crop_top, crop_right, crop_bottom,
+                mFrame->mData,
+                mFrame->mWidth,
+                mFrame->mHeight,
+                dstLeft, dstTop, dstRight, dstBottom);
+        return OK;
+    }
+
+    ALOGE("Unable to convert from format 0x%08x to 0x%08x",
+                srcFormat, dstFormat());
+    return ERROR_UNSUPPORTED;
+}
+
+}  // namespace android
diff --git a/media/libstagefright/StagefrightMetadataRetriever.cpp b/media/libstagefright/StagefrightMetadataRetriever.cpp
index 9babd1a..dfaa8b6 100644
--- a/media/libstagefright/StagefrightMetadataRetriever.cpp
+++ b/media/libstagefright/StagefrightMetadataRetriever.cpp
@@ -20,39 +20,24 @@
 #include <inttypes.h>
 
 #include <utils/Log.h>
-#include <gui/Surface.h>
 
+#include "include/FrameDecoder.h"
 #include "include/StagefrightMetadataRetriever.h"
 
-#include <media/ICrypto.h>
 #include <media/IMediaHTTPService.h>
-#include <media/MediaCodecBuffer.h>
-
-#include <media/DataSource.h>
-#include <media/MediaExtractor.h>
-#include <media/MediaSource.h>
 #include <media/stagefright/foundation/ADebug.h>
 #include <media/stagefright/foundation/AMessage.h>
-#include <media/stagefright/foundation/avc_utils.h>
-#include <media/stagefright/ColorConverter.h>
 #include <media/stagefright/DataSourceFactory.h>
 #include <media/stagefright/FileSource.h>
-#include <media/stagefright/MediaBuffer.h>
-#include <media/stagefright/MediaCodec.h>
 #include <media/stagefright/MediaCodecList.h>
 #include <media/stagefright/MediaDefs.h>
 #include <media/stagefright/MediaErrors.h>
 #include <media/stagefright/MediaExtractorFactory.h>
 #include <media/stagefright/MetaData.h>
-#include <media/stagefright/Utils.h>
-
 #include <media/CharacterEncodingDetector.h>
 
 namespace android {
 
-static const int64_t kBufferTimeOutUs = 30000ll; // 30 msec
-static const size_t kRetryCount = 20; // must be >0
-
 StagefrightMetadataRetriever::StagefrightMetadataRetriever()
     : mParsedMetaData(false),
       mAlbumArt(NULL) {
@@ -145,470 +130,123 @@
     return OK;
 }
 
-static VideoFrame *allocVideoFrame(
-        const sp<MetaData> &trackMeta, int32_t width, int32_t height, int32_t bpp, bool metaOnly) {
-    int32_t rotationAngle;
-    if (!trackMeta->findInt32(kKeyRotation, &rotationAngle)) {
-        rotationAngle = 0;  // By default, no rotation
-    }
+VideoFrame* StagefrightMetadataRetriever::getImageAtIndex(
+        int index, int colorFormat, bool metaOnly) {
 
-    uint32_t type;
-    const void *iccData;
-    size_t iccSize;
-    if (!trackMeta->findData(kKeyIccProfile, &type, &iccData, &iccSize)){
-        iccData = NULL;
-        iccSize = 0;
-    }
+    ALOGV("getImageAtIndex: index: %d colorFormat: %d, metaOnly: %d",
+            index, colorFormat, metaOnly);
 
-    int32_t sarWidth, sarHeight;
-    int32_t displayWidth, displayHeight;
-    if (trackMeta->findInt32(kKeySARWidth, &sarWidth)
-            && trackMeta->findInt32(kKeySARHeight, &sarHeight)
-            && sarHeight != 0) {
-        displayWidth = (width * sarWidth) / sarHeight;
-        displayHeight = height;
-    } else if (trackMeta->findInt32(kKeyDisplayWidth, &displayWidth)
-                && trackMeta->findInt32(kKeyDisplayHeight, &displayHeight)
-                && displayWidth > 0 && displayHeight > 0
-                && width > 0 && height > 0) {
-        ALOGV("found display size %dx%d", displayWidth, displayHeight);
-    } else {
-        displayWidth = width;
-        displayHeight = height;
-    }
-
-    return new VideoFrame(width, height, displayWidth, displayHeight,
-            rotationAngle, bpp, !metaOnly, iccData, iccSize);
-}
-
-static bool getDstColorFormat(android_pixel_format_t colorFormat,
-        OMX_COLOR_FORMATTYPE *omxColorFormat, int32_t *bpp) {
-    switch (colorFormat) {
-        case HAL_PIXEL_FORMAT_RGB_565:
-        {
-            *omxColorFormat = OMX_COLOR_Format16bitRGB565;
-            *bpp = 2;
-            return true;
-        }
-        case HAL_PIXEL_FORMAT_RGBA_8888:
-        {
-            *omxColorFormat = OMX_COLOR_Format32BitRGBA8888;
-            *bpp = 4;
-            return true;
-        }
-        case HAL_PIXEL_FORMAT_BGRA_8888:
-        {
-            *omxColorFormat = OMX_COLOR_Format32bitBGRA8888;
-            *bpp = 4;
-            return true;
-        }
-        default:
-        {
-            ALOGE("Unsupported color format: %d", colorFormat);
-            break;
-        }
-    }
-    return false;
-}
-
-static VideoFrame *extractVideoFrame(
-        const AString &componentName,
-        const sp<MetaData> &trackMeta,
-        const sp<IMediaSource> &source,
-        int64_t frameTimeUs,
-        int seekMode,
-        int colorFormat,
-        bool metaOnly) {
-    sp<MetaData> format = source->getFormat();
-
-    MediaSource::ReadOptions::SeekMode mode =
-            static_cast<MediaSource::ReadOptions::SeekMode>(seekMode);
-    if (seekMode < MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC ||
-        seekMode > MediaSource::ReadOptions::SEEK_CLOSEST) {
-        ALOGE("Unknown seek mode: %d", seekMode);
+    if (mExtractor.get() == NULL) {
+        ALOGE("no extractor.");
         return NULL;
     }
 
-    int32_t dstBpp;
-    OMX_COLOR_FORMATTYPE dstFormat;
-    if (!getDstColorFormat(
-            (android_pixel_format_t)colorFormat, &dstFormat, &dstBpp)) {
-        return NULL;
-    }
+    size_t n = mExtractor->countTracks();
+    size_t i;
+    int imageCount = 0;
 
-    if (metaOnly) {
-        int32_t width, height;
-        CHECK(trackMeta->findInt32(kKeyWidth, &width));
-        CHECK(trackMeta->findInt32(kKeyHeight, &height));
-        return allocVideoFrame(trackMeta, width, height, dstBpp, true);
-    }
+    for (i = 0; i < n; ++i) {
+        sp<MetaData> meta = mExtractor->getTrackMetaData(i);
+        ALOGV("getting track %zu of %zu, meta=%s", i, n, meta->toString().c_str());
 
-    MediaSource::ReadOptions options;
-    sp<MetaData> overrideMeta;
-    if (frameTimeUs < 0) {
-        uint32_t type;
-        const void *data;
-        size_t size;
-        int64_t thumbNailTime;
-        int32_t thumbnailWidth, thumbnailHeight;
+        const char *mime;
+        CHECK(meta->findCString(kKeyMIMEType, &mime));
 
-        // if we have a stand-alone thumbnail, set up the override meta,
-        // and set seekTo time to -1.
-        if (trackMeta->findInt32(kKeyThumbnailWidth, &thumbnailWidth)
-         && trackMeta->findInt32(kKeyThumbnailHeight, &thumbnailHeight)
-         && trackMeta->findData(kKeyThumbnailHVCC, &type, &data, &size)){
-            overrideMeta = new MetaData(*trackMeta);
-            overrideMeta->remove(kKeyDisplayWidth);
-            overrideMeta->remove(kKeyDisplayHeight);
-            overrideMeta->setInt32(kKeyWidth, thumbnailWidth);
-            overrideMeta->setInt32(kKeyHeight, thumbnailHeight);
-            overrideMeta->setData(kKeyHVCC, type, data, size);
-            thumbNailTime = -1ll;
-            ALOGV("thumbnail: %dx%d", thumbnailWidth, thumbnailHeight);
-        } else if (!trackMeta->findInt64(kKeyThumbnailTime, &thumbNailTime)
-                || thumbNailTime < 0) {
-            thumbNailTime = 0;
-        }
-
-        options.setSeekTo(thumbNailTime, mode);
-    } else {
-        options.setSeekTo(frameTimeUs, mode);
-    }
-
-    int32_t gridRows = 1, gridCols = 1;
-    if (overrideMeta == NULL) {
-        // check if we're dealing with a tiled heif
-        int32_t gridWidth, gridHeight;
-        if (trackMeta->findInt32(kKeyGridWidth, &gridWidth) && gridWidth > 0
-         && trackMeta->findInt32(kKeyGridHeight, &gridHeight) && gridHeight > 0) {
-            int32_t width, height, displayWidth, displayHeight;
-            CHECK(trackMeta->findInt32(kKeyWidth, &width));
-            CHECK(trackMeta->findInt32(kKeyHeight, &height));
-            CHECK(trackMeta->findInt32(kKeyDisplayWidth, &displayWidth));
-            CHECK(trackMeta->findInt32(kKeyDisplayHeight, &displayHeight));
-
-            if (width >= displayWidth && height >= displayHeight
-                    && (width % gridWidth == 0) && (height % gridHeight == 0)) {
-                ALOGV("grid config: %dx%d, display %dx%d, grid %dx%d",
-                        width, height, displayWidth, displayHeight, gridWidth, gridHeight);
-
-                overrideMeta = new MetaData(*trackMeta);
-                overrideMeta->remove(kKeyDisplayWidth);
-                overrideMeta->remove(kKeyDisplayHeight);
-                overrideMeta->setInt32(kKeyWidth, gridWidth);
-                overrideMeta->setInt32(kKeyHeight, gridHeight);
-                gridCols = width / gridWidth;
-                gridRows = height / gridHeight;
-            } else {
-                ALOGE("Bad grid config: %dx%d, display %dx%d, grid %dx%d",
-                        width, height, displayWidth, displayHeight, gridWidth, gridHeight);
+        if (!strncasecmp(mime, "image/", 6)) {
+            int32_t isPrimary;
+            if ((index < 0 && meta->findInt32(kKeyIsPrimaryImage, &isPrimary) && isPrimary)
+                    || (index == imageCount++)) {
+                break;
             }
         }
-        if (overrideMeta == NULL) {
-            overrideMeta = trackMeta;
-        }
     }
-    int32_t numTiles = gridRows * gridCols;
 
-    sp<AMessage> videoFormat;
-    if (convertMetaDataToMessage(overrideMeta, &videoFormat) != OK) {
-        ALOGE("b/23680780");
-        ALOGW("Failed to convert meta data to message");
+    if (i == n) {
+        ALOGE("image track not found.");
         return NULL;
     }
 
-    // TODO: Use Flexible color instead
-    videoFormat->setInt32("color-format", OMX_COLOR_FormatYUV420Planar);
+    sp<MetaData> trackMeta = mExtractor->getTrackMetaData(i);
 
-    // For the thumbnail extraction case, try to allocate single buffer in both
-    // input and output ports, if seeking to a sync frame. NOTE: This request may
-    // fail if component requires more than that for decoding.
-    bool isSeekingClosest = (seekMode == MediaSource::ReadOptions::SEEK_CLOSEST);
-    bool decodeSingleFrame = !isSeekingClosest && (numTiles == 1);
-    if (decodeSingleFrame) {
-        videoFormat->setInt32("android._num-input-buffers", 1);
-        videoFormat->setInt32("android._num-output-buffers", 1);
-    }
+    sp<IMediaSource> source = mExtractor->getTrack(i);
 
-    status_t err;
-    sp<ALooper> looper = new ALooper;
-    looper->start();
-    sp<MediaCodec> decoder = MediaCodec::CreateByComponentName(
-            looper, componentName, &err);
-
-    if (decoder.get() == NULL || err != OK) {
-        ALOGW("Failed to instantiate decoder [%s]", componentName.c_str());
+    if (source.get() == NULL) {
+        ALOGE("unable to instantiate image track.");
         return NULL;
     }
 
-    err = decoder->configure(videoFormat, NULL /* surface */, NULL /* crypto */, 0 /* flags */);
-    if (err != OK) {
-        ALOGW("configure returned error %d (%s)", err, asString(err));
-        decoder->release();
-        return NULL;
-    }
-
-    err = decoder->start();
-    if (err != OK) {
-        ALOGW("start returned error %d (%s)", err, asString(err));
-        decoder->release();
-        return NULL;
-    }
-
-    err = source->start();
-    if (err != OK) {
-        ALOGW("source failed to start: %d (%s)", err, asString(err));
-        decoder->release();
-        return NULL;
-    }
-
-    Vector<sp<MediaCodecBuffer> > inputBuffers;
-    err = decoder->getInputBuffers(&inputBuffers);
-    if (err != OK) {
-        ALOGW("failed to get input buffers: %d (%s)", err, asString(err));
-        decoder->release();
-        source->stop();
-        return NULL;
-    }
-
-    Vector<sp<MediaCodecBuffer> > outputBuffers;
-    err = decoder->getOutputBuffers(&outputBuffers);
-    if (err != OK) {
-        ALOGW("failed to get output buffers: %d (%s)", err, asString(err));
-        decoder->release();
-        source->stop();
-        return NULL;
-    }
-
-    sp<AMessage> outputFormat = NULL;
-    bool haveMoreInputs = true;
-    size_t index, offset, size;
-    int64_t timeUs;
-    size_t retriesLeft = kRetryCount;
-    bool done = false;
     const char *mime;
-    bool success = format->findCString(kKeyMIMEType, &mime);
-    if (!success) {
-        ALOGE("Could not find mime type");
-        return NULL;
+    CHECK(trackMeta->findCString(kKeyMIMEType, &mime));
+    ALOGV("extracting from %s track", mime);
+    if (!strcasecmp(mime, MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC)) {
+        mime = MEDIA_MIMETYPE_VIDEO_HEVC;
+        trackMeta = new MetaData(*trackMeta);
+        trackMeta->setCString(kKeyMIMEType, mime);
     }
 
-    bool isAvcOrHevc = !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)
-            || !strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_HEVC);
+    Vector<AString> matchingCodecs;
+    MediaCodecList::findMatchingCodecs(
+            mime,
+            false, /* encoder */
+            MediaCodecList::kPreferSoftwareCodecs,
+            &matchingCodecs);
 
-    bool firstSample = true;
-    int64_t targetTimeUs = -1ll;
+    for (size_t i = 0; i < matchingCodecs.size(); ++i) {
+        const AString &componentName = matchingCodecs[i];
+        ImageDecoder decoder(componentName, trackMeta, source);
+        VideoFrame* frame = decoder.extractFrame(
+                0 /*frameTimeUs*/, 0 /*seekMode*/, colorFormat, metaOnly);
 
-    VideoFrame *frame = NULL;
-    int32_t tilesDecoded = 0;
-
-    do {
-        size_t inputIndex = -1;
-        int64_t ptsUs = 0ll;
-        uint32_t flags = 0;
-        sp<MediaCodecBuffer> codecBuffer = NULL;
-
-        while (haveMoreInputs) {
-            err = decoder->dequeueInputBuffer(&inputIndex, kBufferTimeOutUs);
-            if (err != OK) {
-                ALOGW("Timed out waiting for input");
-                if (retriesLeft) {
-                    err = OK;
-                }
-                break;
-            }
-            codecBuffer = inputBuffers[inputIndex];
-
-            MediaBuffer *mediaBuffer = NULL;
-
-            err = source->read(&mediaBuffer, &options);
-            options.clearSeekTo();
-            if (err != OK) {
-                ALOGW("Input Error or EOS");
-                haveMoreInputs = false;
-                if (err == ERROR_END_OF_STREAM) {
-                    err = OK;
-                }
-                break;
-            }
-            if (firstSample && isSeekingClosest) {
-                mediaBuffer->meta_data()->findInt64(kKeyTargetTime, &targetTimeUs);
-                ALOGV("Seeking closest: targetTimeUs=%lld", (long long)targetTimeUs);
-            }
-            firstSample = false;
-
-            if (mediaBuffer->range_length() > codecBuffer->capacity()) {
-                ALOGE("buffer size (%zu) too large for codec input size (%zu)",
-                        mediaBuffer->range_length(), codecBuffer->capacity());
-                haveMoreInputs = false;
-                err = BAD_VALUE;
-            } else {
-                codecBuffer->setRange(0, mediaBuffer->range_length());
-
-                CHECK(mediaBuffer->meta_data()->findInt64(kKeyTime, &ptsUs));
-                memcpy(codecBuffer->data(),
-                        (const uint8_t*)mediaBuffer->data() + mediaBuffer->range_offset(),
-                        mediaBuffer->range_length());
-            }
-
-            mediaBuffer->release();
-            break;
+        if (frame != NULL) {
+            return frame;
         }
-
-        if (haveMoreInputs && inputIndex < inputBuffers.size()) {
-            if (isAvcOrHevc && IsIDR(codecBuffer->data(), codecBuffer->size())
-                    && decodeSingleFrame) {
-                // Only need to decode one IDR frame, unless we're seeking with CLOSEST
-                // option, in which case we need to actually decode to targetTimeUs.
-                haveMoreInputs = false;
-                flags |= MediaCodec::BUFFER_FLAG_EOS;
-            }
-
-            ALOGV("QueueInput: size=%zu ts=%" PRId64 " us flags=%x",
-                    codecBuffer->size(), ptsUs, flags);
-            err = decoder->queueInputBuffer(
-                    inputIndex,
-                    codecBuffer->offset(),
-                    codecBuffer->size(),
-                    ptsUs,
-                    flags);
-
-            // we don't expect an output from codec config buffer
-            if (flags & MediaCodec::BUFFER_FLAG_CODECCONFIG) {
-                continue;
-            }
-        }
-
-        while (err == OK) {
-            // wait for a decoded buffer
-            err = decoder->dequeueOutputBuffer(
-                    &index,
-                    &offset,
-                    &size,
-                    &timeUs,
-                    &flags,
-                    kBufferTimeOutUs);
-
-            if (err == INFO_FORMAT_CHANGED) {
-                ALOGV("Received format change");
-                err = decoder->getOutputFormat(&outputFormat);
-            } else if (err == INFO_OUTPUT_BUFFERS_CHANGED) {
-                ALOGV("Output buffers changed");
-                err = decoder->getOutputBuffers(&outputBuffers);
-            } else {
-                if (err == -EAGAIN /* INFO_TRY_AGAIN_LATER */ && --retriesLeft > 0) {
-                    ALOGV("Timed-out waiting for output.. retries left = %zu", retriesLeft);
-                    err = OK;
-                } else if (err == OK) {
-                    // If we're seeking with CLOSEST option and obtained a valid targetTimeUs
-                    // from the extractor, decode to the specified frame. Otherwise we're done.
-                    ALOGV("Received an output buffer, timeUs=%lld", (long long)timeUs);
-                    sp<MediaCodecBuffer> videoFrameBuffer = outputBuffers.itemAt(index);
-
-                    int32_t width, height;
-                    CHECK(outputFormat != NULL);
-                    CHECK(outputFormat->findInt32("width", &width));
-                    CHECK(outputFormat->findInt32("height", &height));
-
-                    int32_t crop_left, crop_top, crop_right, crop_bottom;
-                    if (!outputFormat->findRect("crop", &crop_left, &crop_top, &crop_right, &crop_bottom)) {
-                        crop_left = crop_top = 0;
-                        crop_right = width - 1;
-                        crop_bottom = height - 1;
-                    }
-
-                    if (frame == NULL) {
-                        frame = allocVideoFrame(
-                                trackMeta,
-                                (crop_right - crop_left + 1) * gridCols,
-                                (crop_bottom - crop_top + 1) * gridRows,
-                                dstBpp,
-                                false /*metaOnly*/);
-                    }
-
-                    int32_t srcFormat;
-                    CHECK(outputFormat->findInt32("color-format", &srcFormat));
-
-                    ColorConverter converter((OMX_COLOR_FORMATTYPE)srcFormat, dstFormat);
-
-                    int32_t dstLeft, dstTop, dstRight, dstBottom;
-                    if (numTiles == 1) {
-                        dstLeft = crop_left;
-                        dstTop = crop_top;
-                        dstRight = crop_right;
-                        dstBottom = crop_bottom;
-                    } else {
-                        dstLeft = tilesDecoded % gridCols * width;
-                        dstTop = tilesDecoded / gridCols * height;
-                        dstRight = dstLeft + width - 1;
-                        dstBottom = dstTop + height - 1;
-                    }
-
-                    if (converter.isValid()) {
-                        err = converter.convert(
-                                (const uint8_t *)videoFrameBuffer->data(),
-                                width, height,
-                                crop_left, crop_top, crop_right, crop_bottom,
-                                frame->mData,
-                                frame->mWidth,
-                                frame->mHeight,
-                                dstLeft, dstTop, dstRight, dstBottom);
-                    } else {
-                        ALOGE("Unable to convert from format 0x%08x to 0x%08x",
-                                srcFormat, dstFormat);
-
-                        err = ERROR_UNSUPPORTED;
-                    }
-
-                    done = (targetTimeUs < 0ll) || (timeUs >= targetTimeUs);
-                    if (numTiles > 1) {
-                        tilesDecoded++;
-                        done &= (tilesDecoded >= numTiles);
-                    }
-                    err = decoder->releaseOutputBuffer(index);
-                } else {
-                    ALOGW("Received error %d (%s) instead of output", err, asString(err));
-                    done = true;
-                }
-                break;
-            }
-        }
-    } while (err == OK && !done);
-
-    source->stop();
-    decoder->release();
-
-    if (err != OK) {
-        ALOGE("failed to get video frame (err %d)", err);
-        delete frame;
-        frame = NULL;
+        ALOGV("%s failed to extract thumbnail, trying next decoder.", componentName.c_str());
     }
 
-    return frame;
+    return NULL;
 }
 
-VideoFrame *StagefrightMetadataRetriever::getFrameAtTime(
+VideoFrame* StagefrightMetadataRetriever::getFrameAtTime(
         int64_t timeUs, int option, int colorFormat, bool metaOnly) {
-
     ALOGV("getFrameAtTime: %" PRId64 " us option: %d colorFormat: %d, metaOnly: %d",
             timeUs, option, colorFormat, metaOnly);
 
+    VideoFrame *frame;
+    status_t err = getFrameInternal(
+            timeUs, 1, option, colorFormat, metaOnly, &frame, NULL /*outFrames*/);
+    return (err == OK) ? frame : NULL;
+}
+
+status_t StagefrightMetadataRetriever::getFrameAtIndex(
+        std::vector<VideoFrame*>* frames,
+        int frameIndex, int numFrames, int colorFormat, bool metaOnly) {
+    ALOGV("getFrameAtIndex: frameIndex %d, numFrames %d, colorFormat: %d, metaOnly: %d",
+            frameIndex, numFrames, colorFormat, metaOnly);
+
+    return getFrameInternal(
+            frameIndex, numFrames, MediaSource::ReadOptions::SEEK_FRAME_INDEX,
+            colorFormat, metaOnly, NULL /*outFrame*/, frames);
+}
+
+status_t StagefrightMetadataRetriever::getFrameInternal(
+        int64_t timeUs, int numFrames, int option, int colorFormat, bool metaOnly,
+        VideoFrame **outFrame, std::vector<VideoFrame*>* outFrames) {
     if (mExtractor.get() == NULL) {
-        ALOGV("no extractor.");
-        return NULL;
+        ALOGE("no extractor.");
+        return NO_INIT;
     }
 
     sp<MetaData> fileMeta = mExtractor->getMetaData();
 
     if (fileMeta == NULL) {
-        ALOGV("extractor doesn't publish metadata, failed to initialize?");
-        return NULL;
+        ALOGE("extractor doesn't publish metadata, failed to initialize?");
+        return NO_INIT;
     }
 
     int32_t drm = 0;
     if (fileMeta->findInt32(kKeyIsDRM, &drm) && drm != 0) {
         ALOGE("frame grab not allowed.");
-        return NULL;
+        return ERROR_DRM_UNKNOWN;
     }
 
     size_t n = mExtractor->countTracks();
@@ -625,8 +263,8 @@
     }
 
     if (i == n) {
-        ALOGV("no video track found.");
-        return NULL;
+        ALOGE("no video track found.");
+        return INVALID_OPERATION;
     }
 
     sp<MetaData> trackMeta = mExtractor->getTrackMetaData(
@@ -636,7 +274,7 @@
 
     if (source.get() == NULL) {
         ALOGV("unable to instantiate video track.");
-        return NULL;
+        return UNKNOWN_ERROR;
     }
 
     const void *data;
@@ -659,16 +297,25 @@
 
     for (size_t i = 0; i < matchingCodecs.size(); ++i) {
         const AString &componentName = matchingCodecs[i];
-        VideoFrame *frame = extractVideoFrame(
-                componentName, trackMeta, source, timeUs, option, colorFormat, metaOnly);
-
-        if (frame != NULL) {
-            return frame;
+        VideoFrameDecoder decoder(componentName, trackMeta, source);
+        if (outFrame != NULL) {
+            *outFrame = decoder.extractFrame(
+                    timeUs, option, colorFormat, metaOnly);
+            if (*outFrame != NULL) {
+                return OK;
+            }
+        } else if (outFrames != NULL) {
+            status_t err = decoder.extractFrames(
+                    timeUs, numFrames, option, colorFormat, outFrames);
+            if (err == OK) {
+                return OK;
+            }
         }
-        ALOGV("%s failed to extract thumbnail, trying next decoder.", componentName.c_str());
+        ALOGV("%s failed to extract frame, trying next decoder.", componentName.c_str());
     }
 
-    return NULL;
+    ALOGE("all codecs failed to extract frame.");
+    return UNKNOWN_ERROR;
 }
 
 MediaAlbumArt *StagefrightMetadataRetriever::extractAlbumArt() {
@@ -800,8 +447,14 @@
     bool hasVideo = false;
     int32_t videoWidth = -1;
     int32_t videoHeight = -1;
+    int32_t videoFrameCount = 0;
     int32_t audioBitrate = -1;
     int32_t rotationAngle = -1;
+    int32_t imageCount = 0;
+    int32_t imagePrimary = 0;
+    int32_t imageWidth = -1;
+    int32_t imageHeight = -1;
+    int32_t imageRotation = -1;
 
     // The overall duration is the duration of the longest track.
     int64_t maxDurationUs = 0;
@@ -832,6 +485,20 @@
                 if (!trackMeta->findInt32(kKeyRotation, &rotationAngle)) {
                     rotationAngle = 0;
                 }
+                if (!trackMeta->findInt32(kKeyFrameCount, &videoFrameCount)) {
+                    videoFrameCount = 0;
+                }
+            } else if (!strncasecmp("image/", mime, 6)) {
+                int32_t isPrimary;
+                if (trackMeta->findInt32(kKeyIsPrimaryImage, &isPrimary) && isPrimary) {
+                    imagePrimary = imageCount;
+                    CHECK(trackMeta->findInt32(kKeyWidth, &imageWidth));
+                    CHECK(trackMeta->findInt32(kKeyHeight, &imageHeight));
+                    if (!trackMeta->findInt32(kKeyRotation, &imageRotation)) {
+                        imageRotation = 0;
+                    }
+                }
+                imageCount++;
             } else if (!strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP)) {
                 const char *lang;
                 if (trackMeta->findCString(kKeyMediaLanguage, &lang)) {
@@ -870,6 +537,30 @@
 
         sprintf(tmp, "%d", rotationAngle);
         mMetaData.add(METADATA_KEY_VIDEO_ROTATION, String8(tmp));
+
+        if (videoFrameCount > 0) {
+            sprintf(tmp, "%d", videoFrameCount);
+            mMetaData.add(METADATA_KEY_VIDEO_FRAME_COUNT, String8(tmp));
+        }
+    }
+
+    if (imageCount > 0) {
+        mMetaData.add(METADATA_KEY_HAS_IMAGE, String8("yes"));
+
+        sprintf(tmp, "%d", imageCount);
+        mMetaData.add(METADATA_KEY_IMAGE_COUNT, String8(tmp));
+
+        sprintf(tmp, "%d", imagePrimary);
+        mMetaData.add(METADATA_KEY_IMAGE_PRIMARY, String8(tmp));
+
+        sprintf(tmp, "%d", imageWidth);
+        mMetaData.add(METADATA_KEY_IMAGE_WIDTH, String8(tmp));
+
+        sprintf(tmp, "%d", imageHeight);
+        mMetaData.add(METADATA_KEY_IMAGE_HEIGHT, String8(tmp));
+
+        sprintf(tmp, "%d", imageRotation);
+        mMetaData.add(METADATA_KEY_IMAGE_ROTATION, String8(tmp));
     }
 
     if (numTracks == 1 && hasAudio && audioBitrate >= 0) {
diff --git a/media/libstagefright/Utils.cpp b/media/libstagefright/Utils.cpp
index bd80e45..6e77f15 100644
--- a/media/libstagefright/Utils.cpp
+++ b/media/libstagefright/Utils.cpp
@@ -639,7 +639,8 @@
         msg->setString("language", lang);
     }
 
-    if (!strncasecmp("video/", mime, 6)) {
+    if (!strncasecmp("video/", mime, 6) ||
+            !strncasecmp("image/", mime, 6)) {
         int32_t width, height;
         if (!meta->findInt32(kKeyWidth, &width)
                 || !meta->findInt32(kKeyHeight, &height)) {
@@ -663,6 +664,19 @@
             msg->setInt32("sar-height", sarHeight);
         }
 
+        if (!strncasecmp("image/", mime, 6)) {
+            int32_t gridWidth, gridHeight, gridRows, gridCols;
+            if (meta->findInt32(kKeyGridWidth, &gridWidth)
+                    && meta->findInt32(kKeyHeight, &gridHeight)
+                    && meta->findInt32(kKeyGridRows, &gridRows)
+                    && meta->findInt32(kKeyGridCols, &gridCols)) {
+                msg->setInt32("grid-width", gridWidth);
+                msg->setInt32("grid-height", gridHeight);
+                msg->setInt32("grid-rows", gridRows);
+                msg->setInt32("grid-cols", gridCols);
+            }
+        }
+
         int32_t colorFormat;
         if (meta->findInt32(kKeyColorFormat, &colorFormat)) {
             msg->setInt32("color-format", colorFormat);
diff --git a/media/libstagefright/colorconversion/ColorConverter.cpp b/media/libstagefright/colorconversion/ColorConverter.cpp
index 0982006..cbb38fd 100644
--- a/media/libstagefright/colorconversion/ColorConverter.cpp
+++ b/media/libstagefright/colorconversion/ColorConverter.cpp
@@ -129,6 +129,12 @@
             dstWidth, dstHeight,
             dstCropLeft, dstCropTop, dstCropRight, dstCropBottom, mDstFormat);
 
+    if (!((src.mCropLeft & 1) == 0
+        && src.cropWidth() == dst.cropWidth()
+        && src.cropHeight() == dst.cropHeight())) {
+        return ERROR_UNSUPPORTED;
+    }
+
     status_t err;
 
     switch (mSrcFormat) {
@@ -172,12 +178,6 @@
 
     uint8_t *kAdjustedClip = initClip();
 
-    if (!((src.mCropLeft & 1) == 0
-        && src.cropWidth() == dst.cropWidth()
-        && src.cropHeight() == dst.cropHeight())) {
-        return ERROR_UNSUPPORTED;
-    }
-
     uint16_t *dst_ptr = (uint16_t *)dst.mBits
         + dst.mCropTop * dst.mWidth + dst.mCropLeft;
 
@@ -232,12 +232,6 @@
 
 status_t ColorConverter::convertYUV420PlanarUseLibYUV(
         const BitmapParams &src, const BitmapParams &dst) {
-    if (!((src.mCropLeft & 1) == 0
-            && src.cropWidth() == dst.cropWidth()
-            && src.cropHeight() == dst.cropHeight())) {
-        return ERROR_UNSUPPORTED;
-    }
-
     uint8_t *dst_ptr = (uint8_t *)dst.mBits
         + dst.mCropTop * dst.mStride + dst.mCropLeft * dst.mBpp;
 
@@ -338,12 +332,6 @@
 }
 status_t ColorConverter::convertYUV420Planar(
         const BitmapParams &src, const BitmapParams &dst) {
-    if (!((src.mCropLeft & 1) == 0
-            && src.cropWidth() == dst.cropWidth()
-            && src.cropHeight() == dst.cropHeight())) {
-        return ERROR_UNSUPPORTED;
-    }
-
     uint8_t *kAdjustedClip = initClip();
 
     uint8_t *dst_ptr = (uint8_t *)dst.mBits
@@ -422,12 +410,6 @@
         const BitmapParams &src, const BitmapParams &dst) {
     uint8_t *kAdjustedClip = initClip();
 
-    if (!((src.mCropLeft & 1) == 0
-            && src.cropWidth() == dst.cropWidth()
-            && src.cropHeight() == dst.cropHeight())) {
-        return ERROR_UNSUPPORTED;
-    }
-
     uint16_t *dst_ptr = (uint16_t *)dst.mBits
         + dst.mCropTop * dst.mWidth + dst.mCropLeft;
 
@@ -496,12 +478,6 @@
 
     uint8_t *kAdjustedClip = initClip();
 
-    if (!((src.mCropLeft & 1) == 0
-            && src.cropWidth() == dst.cropWidth()
-            && src.cropHeight() == dst.cropHeight())) {
-        return ERROR_UNSUPPORTED;
-    }
-
     uint16_t *dst_ptr = (uint16_t *)dst.mBits
         + dst.mCropTop * dst.mWidth + dst.mCropLeft;
 
@@ -568,12 +544,6 @@
         const BitmapParams &src, const BitmapParams &dst) {
     uint8_t *kAdjustedClip = initClip();
 
-    if (!((src.mCropLeft & 1) == 0
-            && src.cropWidth() == dst.cropWidth()
-            && src.cropHeight() == dst.cropHeight())) {
-        return ERROR_UNSUPPORTED;
-    }
-
     uint16_t *dst_ptr = (uint16_t *)dst.mBits
         + dst.mCropTop * dst.mWidth + dst.mCropLeft;
 
diff --git a/media/libstagefright/foundation/MediaDefs.cpp b/media/libstagefright/foundation/MediaDefs.cpp
index 7caebc6..1695c75 100644
--- a/media/libstagefright/foundation/MediaDefs.cpp
+++ b/media/libstagefright/foundation/MediaDefs.cpp
@@ -19,6 +19,7 @@
 namespace android {
 
 const char *MEDIA_MIMETYPE_IMAGE_JPEG = "image/jpeg";
+const char *MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC = "image/vnd.android.heic";
 
 const char *MEDIA_MIMETYPE_VIDEO_VP8 = "video/x-vnd.on2.vp8";
 const char *MEDIA_MIMETYPE_VIDEO_VP9 = "video/x-vnd.on2.vp9";
@@ -58,6 +59,7 @@
 const char *MEDIA_MIMETYPE_CONTAINER_MPEG2TS = "video/mp2ts";
 const char *MEDIA_MIMETYPE_CONTAINER_AVI = "video/avi";
 const char *MEDIA_MIMETYPE_CONTAINER_MPEG2PS = "video/mp2p";
+const char *MEDIA_MIMETYPE_CONTAINER_HEIF = "image/heif";
 
 const char *MEDIA_MIMETYPE_TEXT_3GPP = "text/3gpp-tt";
 const char *MEDIA_MIMETYPE_TEXT_SUBRIP = "application/x-subrip";
diff --git a/media/libstagefright/foundation/include/media/stagefright/foundation/MediaDefs.h b/media/libstagefright/foundation/include/media/stagefright/foundation/MediaDefs.h
index 7f17013..25be89f 100644
--- a/media/libstagefright/foundation/include/media/stagefright/foundation/MediaDefs.h
+++ b/media/libstagefright/foundation/include/media/stagefright/foundation/MediaDefs.h
@@ -21,6 +21,7 @@
 namespace android {
 
 extern const char *MEDIA_MIMETYPE_IMAGE_JPEG;
+extern const char *MEDIA_MIMETYPE_IMAGE_ANDROID_HEIC;
 
 extern const char *MEDIA_MIMETYPE_VIDEO_VP8;
 extern const char *MEDIA_MIMETYPE_VIDEO_VP9;
@@ -60,6 +61,7 @@
 extern const char *MEDIA_MIMETYPE_CONTAINER_MPEG2TS;
 extern const char *MEDIA_MIMETYPE_CONTAINER_AVI;
 extern const char *MEDIA_MIMETYPE_CONTAINER_MPEG2PS;
+extern const char *MEDIA_MIMETYPE_CONTAINER_HEIF;
 
 extern const char *MEDIA_MIMETYPE_TEXT_3GPP;
 extern const char *MEDIA_MIMETYPE_TEXT_SUBRIP;
diff --git a/media/libstagefright/include/FrameDecoder.h b/media/libstagefright/include/FrameDecoder.h
new file mode 100644
index 0000000..d7c074c
--- /dev/null
+++ b/media/libstagefright/include/FrameDecoder.h
@@ -0,0 +1,176 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FRAME_DECODER_H_
+#define FRAME_DECODER_H_
+
+#include <media/stagefright/foundation/AString.h>
+#include <media/stagefright/foundation/ABase.h>
+#include <media/MediaSource.h>
+#include <media/openmax/OMX_Video.h>
+#include <system/graphics-base.h>
+
+namespace android {
+
+struct AMessage;
+class MediaCodecBuffer;
+class VideoFrame;
+
+struct FrameDecoder {
+    FrameDecoder(
+            const AString &componentName,
+            const sp<MetaData> &trackMeta,
+            const sp<IMediaSource> &source) :
+                mComponentName(componentName),
+                mTrackMeta(trackMeta),
+                mSource(source),
+                mDstFormat(OMX_COLOR_Format16bitRGB565),
+                mDstBpp(2) {}
+
+    VideoFrame* extractFrame(
+            int64_t frameTimeUs,
+            int option,
+            int colorFormat,
+            bool metaOnly);
+
+    status_t extractFrames(
+            int64_t frameTimeUs,
+            size_t numFrames,
+            int option,
+            int colorFormat,
+            std::vector<VideoFrame*>* frames);
+
+protected:
+    virtual ~FrameDecoder() {}
+
+    virtual sp<AMessage> onGetFormatAndSeekOptions(
+            int64_t frameTimeUs,
+            size_t numFrames,
+            int seekMode,
+            MediaSource::ReadOptions *options) = 0;
+
+    virtual status_t onInputReceived(
+            const sp<MediaCodecBuffer> &codecBuffer,
+            const sp<MetaData> &sampleMeta,
+            bool firstSample,
+            uint32_t *flags) = 0;
+
+    virtual status_t onOutputReceived(
+            const sp<MediaCodecBuffer> &videoFrameBuffer,
+            const sp<AMessage> &outputFormat,
+            int64_t timeUs,
+            bool *done) = 0;
+
+    VideoFrame *allocVideoFrame(int32_t width, int32_t height, bool metaOnly);
+
+    sp<MetaData> trackMeta()     const      { return mTrackMeta; }
+    OMX_COLOR_FORMATTYPE dstFormat() const  { return mDstFormat; }
+    int32_t dstBpp()             const      { return mDstBpp; }
+
+    void addFrame(VideoFrame *frame) {
+        mFrames.push_back(std::unique_ptr<VideoFrame>(frame));
+    }
+
+private:
+    AString mComponentName;
+    sp<MetaData> mTrackMeta;
+    sp<IMediaSource> mSource;
+    OMX_COLOR_FORMATTYPE mDstFormat;
+    int32_t mDstBpp;
+    std::vector<std::unique_ptr<VideoFrame> > mFrames;
+
+    bool setDstColorFormat(android_pixel_format_t colorFormat);
+    status_t extractInternal(int64_t frameTimeUs, size_t numFrames, int option);
+
+    DISALLOW_EVIL_CONSTRUCTORS(FrameDecoder);
+};
+
+struct VideoFrameDecoder : public FrameDecoder {
+    VideoFrameDecoder(
+            const AString &componentName,
+            const sp<MetaData> &trackMeta,
+            const sp<IMediaSource> &source) :
+                FrameDecoder(componentName, trackMeta, source),
+                mIsAvcOrHevc(false),
+                mSeekMode(MediaSource::ReadOptions::SEEK_PREVIOUS_SYNC),
+                mTargetTimeUs(-1ll),
+                mNumFrames(0),
+                mNumFramesDecoded(0) {}
+
+protected:
+    virtual sp<AMessage> onGetFormatAndSeekOptions(
+            int64_t frameTimeUs,
+            size_t numFrames,
+            int seekMode,
+            MediaSource::ReadOptions *options) override;
+
+    virtual status_t onInputReceived(
+            const sp<MediaCodecBuffer> &codecBuffer,
+            const sp<MetaData> &sampleMeta,
+            bool firstSample,
+            uint32_t *flags) override;
+
+    virtual status_t onOutputReceived(
+            const sp<MediaCodecBuffer> &videoFrameBuffer,
+            const sp<AMessage> &outputFormat,
+            int64_t timeUs,
+            bool *done) override;
+
+private:
+    bool mIsAvcOrHevc;
+    MediaSource::ReadOptions::SeekMode mSeekMode;
+    int64_t mTargetTimeUs;
+    size_t mNumFrames;
+    size_t mNumFramesDecoded;
+};
+
+struct ImageDecoder : public FrameDecoder {
+    ImageDecoder(
+            const AString &componentName,
+            const sp<MetaData> &trackMeta,
+            const sp<IMediaSource> &source) :
+                FrameDecoder(componentName, trackMeta, source),
+                mFrame(NULL), mGridRows(1), mGridCols(1), mTilesDecoded(0) {}
+
+protected:
+    virtual sp<AMessage> onGetFormatAndSeekOptions(
+            int64_t frameTimeUs,
+            size_t numFrames,
+            int seekMode,
+            MediaSource::ReadOptions *options) override;
+
+    virtual status_t onInputReceived(
+            const sp<MediaCodecBuffer> &codecBuffer __unused,
+            const sp<MetaData> &sampleMeta __unused,
+            bool firstSample __unused,
+            uint32_t *flags __unused) override { return OK; }
+
+    virtual status_t onOutputReceived(
+            const sp<MediaCodecBuffer> &videoFrameBuffer,
+            const sp<AMessage> &outputFormat,
+            int64_t timeUs,
+            bool *done) override;
+
+private:
+    VideoFrame *mFrame;
+    int32_t mGridRows;
+    int32_t mGridCols;
+    int32_t mTilesDecoded;
+};
+
+}  // namespace android
+
+#endif  // FRAME_DECODER_H_
diff --git a/media/libstagefright/include/StagefrightMetadataRetriever.h b/media/libstagefright/include/StagefrightMetadataRetriever.h
index 277eb3e..58442fe 100644
--- a/media/libstagefright/include/StagefrightMetadataRetriever.h
+++ b/media/libstagefright/include/StagefrightMetadataRetriever.h
@@ -40,7 +40,14 @@
     virtual status_t setDataSource(int fd, int64_t offset, int64_t length);
     virtual status_t setDataSource(const sp<DataSource>& source, const char *mime);
 
-    virtual VideoFrame *getFrameAtTime(int64_t timeUs, int option, int colorFormat, bool metaOnly);
+    virtual VideoFrame* getFrameAtTime(
+            int64_t timeUs, int option, int colorFormat, bool metaOnly);
+    virtual VideoFrame* getImageAtIndex(
+            int index, int colorFormat, bool metaOnly);
+    virtual status_t getFrameAtIndex(
+            std::vector<VideoFrame*>* frames,
+            int frameIndex, int numFrames, int colorFormat, bool metaOnly);
+
     virtual MediaAlbumArt *extractAlbumArt();
     virtual const char *extractMetadata(int keyCode);
 
@@ -56,6 +63,10 @@
     // Delete album art and clear metadata.
     void clearMetadata();
 
+    status_t getFrameInternal(
+            int64_t timeUs, int numFrames, int option, int colorFormat, bool metaOnly,
+            VideoFrame **outFrame, std::vector<VideoFrame*>* outFrames);
+
     StagefrightMetadataRetriever(const StagefrightMetadataRetriever &);
 
     StagefrightMetadataRetriever &operator=(
diff --git a/media/libstagefright/include/media/stagefright/MetaData.h b/media/libstagefright/include/media/stagefright/MetaData.h
index 6cfde9c..3438c56 100644
--- a/media/libstagefright/include/media/stagefright/MetaData.h
+++ b/media/libstagefright/include/media/stagefright/MetaData.h
@@ -215,7 +215,11 @@
 
     kKeyGridWidth        = 'grdW', // int32_t, HEIF grid width
     kKeyGridHeight       = 'grdH', // int32_t, HEIF grid height
+    kKeyGridRows         = 'grdR', // int32_t, HEIF grid rows
+    kKeyGridCols         = 'grdC', // int32_t, HEIF grid columns
     kKeyIccProfile       = 'prof', // raw data, ICC prifile data
+    kKeyIsPrimaryImage   = 'prim', // bool (int32_t), image track is the primary image
+    kKeyFrameCount       = 'nfrm', // int32_t, total number of frame in video track
 };
 
 enum {