diff --git a/autogl.h b/autogl.h
new file mode 100644
index 0000000..5d25e44
--- /dev/null
+++ b/autogl.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2015 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ANDROID_AUTO_GL_H_
+#define ANDROID_AUTO_GL_H_
+
+#include <memory>
+#define EGL_EGLEXT_PROTOTYPES
+#define GL_GLEXT_PROTOTYPES
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+#include <GLES2/gl2.h>
+#include <GLES2/gl2ext.h>
+
+// TODO(zachr): use hwc_drm_bo to turn buffer handles into textures
+#ifndef EGL_NATIVE_HANDLE_ANDROID_NVX
+#define EGL_NATIVE_HANDLE_ANDROID_NVX 0x322A
+#endif
+
+namespace android {
+
+#define AUTO_GL_TYPE(name, type, zero, deleter) \
+  struct name##Deleter {                        \
+    typedef type pointer;                       \
+                                                \
+    void operator()(pointer p) const {          \
+      if (p != zero) {                          \
+        deleter;                                \
+      }                                         \
+    }                                           \
+  };                                            \
+  typedef std::unique_ptr<type, name##Deleter> name;
+
+AUTO_GL_TYPE(AutoGLFramebuffer, GLuint, 0, glDeleteFramebuffers(1, &p))
+AUTO_GL_TYPE(AutoGLBuffer, GLuint, 0, glDeleteBuffers(1, &p))
+AUTO_GL_TYPE(AutoGLTexture, GLuint, 0, glDeleteTextures(1, &p))
+AUTO_GL_TYPE(AutoGLShader, GLint, 0, glDeleteShader(p))
+AUTO_GL_TYPE(AutoGLProgram, GLint, 0, glDeleteProgram(p))
+
+struct AutoEGLDisplayImage {
+  AutoEGLDisplayImage() : display_(EGL_NO_DISPLAY), image_(EGL_NO_IMAGE_KHR) {
+  }
+
+  AutoEGLDisplayImage(EGLDisplay display, EGLImageKHR image)
+      : display_(display), image_(image) {
+  }
+
+  AutoEGLDisplayImage(const AutoEGLDisplayImage& rhs) = delete;
+  AutoEGLDisplayImage(AutoEGLDisplayImage&& rhs) {
+    clear();
+    std::swap(display_, rhs.display_);
+    std::swap(image_, rhs.image_);
+  }
+
+  ~AutoEGLDisplayImage() {
+    clear();
+  }
+
+  AutoEGLDisplayImage& operator=(const AutoEGLDisplayImage& rhs) = delete;
+  AutoEGLDisplayImage& operator=(AutoEGLDisplayImage&& rhs) {
+    clear();
+    std::swap(display_, rhs.display_);
+    std::swap(image_, rhs.image_);
+    return *this;
+  }
+
+  void reset(EGLDisplay display, EGLImageKHR image) {
+    clear();
+    display_ = display;
+    image_ = image;
+  }
+
+  void clear() {
+    if (image_ != EGL_NO_IMAGE_KHR) {
+      eglDestroyImageKHR(display_, image_);
+      display_ = EGL_NO_DISPLAY;
+      image_ = EGL_NO_IMAGE_KHR;
+    }
+  }
+
+  EGLImageKHR image() const {
+    return image_;
+  }
+
+ private:
+  EGLDisplay display_;
+  EGLImageKHR image_;
+};
+
+struct AutoEGLImageAndGLTexture {
+  AutoEGLDisplayImage image;
+  AutoGLTexture texture;
+};
+}
+
+#endif
diff --git a/drmcomposition.cpp b/drmcomposition.cpp
index f4e8d9d..864c5e5 100644
--- a/drmcomposition.cpp
+++ b/drmcomposition.cpp
@@ -24,28 +24,28 @@
 #include <stdlib.h>
 
 #include <cutils/log.h>
+#include <cutils/properties.h>
 #include <sw_sync.h>
 #include <sync/sync.h>
 
 namespace android {
 
-static const bool kUseOverlayPlanes = true;
-
 DrmComposition::DrmComposition(DrmResources *drm, Importer *importer)
     : drm_(drm), importer_(importer) {
+  char use_overlay_planes_prop[PROPERTY_VALUE_MAX];
+  property_get("hwc.drm.use_overlay_planes", use_overlay_planes_prop, "1");
+  bool use_overlay_planes = atoi(use_overlay_planes_prop);
+
   for (DrmResources::PlaneIter iter = drm_->begin_planes();
        iter != drm_->end_planes(); ++iter) {
     if ((*iter)->type() == DRM_PLANE_TYPE_PRIMARY)
       primary_planes_.push_back(*iter);
-    else if (kUseOverlayPlanes && (*iter)->type() == DRM_PLANE_TYPE_OVERLAY)
+    else if (use_overlay_planes && (*iter)->type() == DRM_PLANE_TYPE_OVERLAY)
       overlay_planes_.push_back(*iter);
   }
 }
 
-DrmComposition::~DrmComposition() {
-}
-
-int DrmComposition::Init() {
+int DrmComposition::Init(uint64_t frame_no) {
   for (DrmResources::ConnectorIter iter = drm_->begin_connectors();
        iter != drm_->end_connectors(); ++iter) {
     int display = (*iter)->display();
@@ -54,7 +54,12 @@
       ALOGE("Failed to allocate new display composition\n");
       return -ENOMEM;
     }
-    int ret = composition_map_[(*iter)->display()]->Init(drm_, importer_);
+
+    // If the display hasn't been modeset yet, this will be NULL
+    DrmCrtc *crtc = drm_->GetCrtcForDisplay(display);
+
+    int ret = composition_map_[(*iter)->display()]->Init(drm_, crtc, importer_,
+                                                         frame_no);
     if (ret) {
       ALOGE("Failed to init display composition for %d", (*iter)->display());
       return ret;
@@ -63,42 +68,26 @@
   return 0;
 }
 
-unsigned DrmComposition::GetRemainingLayers(int /*display*/,
-                                            unsigned num_needed) const {
-  return num_needed;
+int DrmComposition::SetLayers(size_t num_displays,
+                              const DrmCompositionDisplayLayersMap *maps) {
+  int ret = 0;
+  for (size_t display_index = 0; display_index < num_displays;
+       display_index++) {
+    const DrmCompositionDisplayLayersMap &map = maps[display_index];
+    int display = map.display;
+
+    ret = composition_map_[display]->SetLayers(
+        map.layers, map.num_layers, map.layer_indices, &primary_planes_,
+        &overlay_planes_);
+    if (ret)
+      return ret;
+  }
+
+  return DisableUnusedPlanes();
 }
 
-int DrmComposition::AddLayer(int display, hwc_layer_1_t *layer,
-                             hwc_drm_bo *bo) {
-  DrmCrtc *crtc = drm_->GetCrtcForDisplay(display);
-  if (!crtc) {
-    ALOGE("Failed to find crtc for display %d", display);
-    return -ENODEV;
-  }
-
-  // Find a plane for the layer
-  DrmPlane *plane = NULL;
-  for (std::vector<DrmPlane *>::iterator iter = primary_planes_.begin();
-       iter != primary_planes_.end(); ++iter) {
-    if ((*iter)->GetCrtcSupported(*crtc)) {
-      plane = *iter;
-      primary_planes_.erase(iter);
-      break;
-    }
-  }
-  for (std::vector<DrmPlane *>::iterator iter = overlay_planes_.begin();
-       !plane && iter != overlay_planes_.end(); ++iter) {
-    if ((*iter)->GetCrtcSupported(*crtc)) {
-      plane = *iter;
-      overlay_planes_.erase(iter);
-      break;
-    }
-  }
-  return composition_map_[display]->AddLayer(layer, bo, crtc, plane);
-}
-
-int DrmComposition::AddDpmsMode(int display, uint32_t dpms_mode) {
-  return composition_map_[display]->AddDpmsMode(dpms_mode);
+int DrmComposition::SetDpmsMode(int display, uint32_t dpms_mode) {
+  return composition_map_[display]->SetDpmsMode(dpms_mode);
 }
 
 std::unique_ptr<DrmDisplayComposition> DrmComposition::TakeDisplayComposition(
diff --git a/drmcomposition.h b/drmcomposition.h
index 69bf6d9..426ced0 100644
--- a/drmcomposition.h
+++ b/drmcomposition.h
@@ -31,21 +31,26 @@
 
 namespace android {
 
+struct DrmCompositionDisplayLayersMap {
+  int display;
+  size_t num_layers;
+  hwc_layer_1_t *layers;
+  size_t *layer_indices;
+};
+
 class DrmComposition {
  public:
   DrmComposition(DrmResources *drm, Importer *importer);
-  ~DrmComposition();
 
-  int Init();
+  int Init(uint64_t frame_no);
 
-  unsigned GetRemainingLayers(int display, unsigned num_needed) const;
-  int AddLayer(int display, hwc_layer_1_t *layer, hwc_drm_bo_t *bo);
-  int AddDpmsMode(int display, uint32_t dpms_mode);
-
-  int DisableUnusedPlanes();
+  int SetLayers(size_t num_displays,
+                const DrmCompositionDisplayLayersMap *maps);
+  int SetDpmsMode(int display, uint32_t dpms_mode);
 
   std::unique_ptr<DrmDisplayComposition> TakeDisplayComposition(int display);
   DrmDisplayComposition *GetDisplayComposition(int display);
+  int DisableUnusedPlanes();
 
  private:
   DrmComposition(const DrmComposition &) = delete;
@@ -62,7 +67,6 @@
    */
   std::map<int, std::unique_ptr<DrmDisplayComposition>> composition_map_;
 };
-
 }
 
 #endif  // ANDROID_DRM_COMPOSITION_H_
diff --git a/drmcompositor.cpp b/drmcompositor.cpp
index 082e75d..ad4cf88 100644
--- a/drmcompositor.cpp
+++ b/drmcompositor.cpp
@@ -27,7 +27,7 @@
 
 namespace android {
 
-DrmCompositor::DrmCompositor(DrmResources *drm) : drm_(drm) {
+DrmCompositor::DrmCompositor(DrmResources *drm) : drm_(drm), frame_no_(0) {
 }
 
 DrmCompositor::~DrmCompositor() {
@@ -53,7 +53,7 @@
     ALOGE("Failed to allocate drm composition");
     return NULL;
   }
-  int ret = composition->Init();
+  int ret = composition->Init(++frame_no_);
   if (ret) {
     ALOGE("Failed to initialize drm composition %d", ret);
     delete composition;
@@ -62,7 +62,8 @@
   return composition;
 }
 
-int DrmCompositor::QueueComposition(DrmComposition *composition) {
+int DrmCompositor::QueueComposition(
+    std::unique_ptr<DrmComposition> composition) {
   int ret = composition->DisableUnusedPlanes();
   if (ret) {
     ALOGE("Failed to disable unused planes %d", ret);
@@ -76,7 +77,6 @@
         composition->TakeDisplayComposition(display));
     if (ret) {
       ALOGE("Failed to queue composition for display %d", display);
-      delete composition;
       return ret;
     }
   }
diff --git a/drmcompositor.h b/drmcompositor.h
index aa4a876..0741932 100644
--- a/drmcompositor.h
+++ b/drmcompositor.h
@@ -22,6 +22,7 @@
 #include "importer.h"
 
 #include <map>
+#include <memory>
 #include <sstream>
 
 namespace android {
@@ -35,7 +36,7 @@
 
   DrmComposition *CreateComposition(Importer *importer);
 
-  int QueueComposition(DrmComposition *composition);
+  int QueueComposition(std::unique_ptr<DrmComposition> composition);
   int Composite();
   void Dump(std::ostringstream *out) const;
 
@@ -44,6 +45,8 @@
 
   DrmResources *drm_;
 
+  uint64_t frame_no_;
+
   // mutable for Dump() propagation
   mutable std::map<int, DrmDisplayCompositor> compositor_map_;
 };
diff --git a/drmdisplaycomposition.cpp b/drmdisplaycomposition.cpp
index 21a9c2a..c1848e0 100644
--- a/drmdisplaycomposition.cpp
+++ b/drmdisplaycomposition.cpp
@@ -30,13 +30,39 @@
 
 namespace android {
 
-DrmCompositionLayer::DrmCompositionLayer() : crtc(NULL), plane(NULL) {
-  memset(&layer, 0, sizeof(layer));
-  layer.acquireFenceFd = -1;
-  memset(&bo, 0, sizeof(bo));
+static native_handle_t *dup_buffer_handle(buffer_handle_t handle) {
+  native_handle_t *new_handle =
+      native_handle_create(handle->numFds, handle->numInts);
+  if (new_handle == NULL)
+    return NULL;
+
+  const int *old_data = handle->data;
+  int *new_data = new_handle->data;
+  for (int i = 0; i < handle->numFds; i++) {
+    *new_data = dup(*old_data);
+    old_data++;
+    new_data++;
+  }
+  memcpy(new_data, old_data, sizeof(int) * handle->numInts);
+
+  return new_handle;
 }
 
-DrmCompositionLayer::~DrmCompositionLayer() {
+static void free_buffer_handle(native_handle_t *handle) {
+  int ret = native_handle_close(handle);
+  if (ret)
+    ALOGE("Failed to close native handle %d", ret);
+  ret = native_handle_delete(handle);
+  if (ret)
+    ALOGE("Failed to delete native handle %d", ret);
+}
+
+DrmCompositionLayer::DrmCompositionLayer()
+    : crtc(NULL), plane(NULL), handle(NULL) {
+  memset(&layer, 0, sizeof(layer));
+  layer.releaseFenceFd = -1;
+  layer.acquireFenceFd = -1;
+  memset(&bo, 0, sizeof(bo));
 }
 
 DrmDisplayComposition::DrmDisplayComposition()
@@ -46,7 +72,10 @@
       timeline_fd_(-1),
       timeline_(0),
       timeline_current_(0),
-      dpms_mode_(DRM_MODE_DPMS_ON) {
+      timeline_pre_comp_done_(0),
+      pre_composition_layer_index_(-1),
+      dpms_mode_(DRM_MODE_DPMS_ON),
+      frame_no_(0) {
 }
 
 DrmDisplayComposition::~DrmDisplayComposition() {
@@ -55,6 +84,11 @@
     if (importer_ && iter->bo.fb_id)
       importer_->ReleaseBuffer(&iter->bo);
 
+    if (iter->handle) {
+      gralloc_->unregisterBuffer(gralloc_, iter->handle);
+      free_buffer_handle(iter->handle);
+    }
+
     if (iter->layer.acquireFenceFd >= 0)
       close(iter->layer.acquireFenceFd);
   }
@@ -66,11 +100,21 @@
   }
 }
 
-int DrmDisplayComposition::Init(DrmResources *drm, Importer *importer) {
+int DrmDisplayComposition::Init(DrmResources *drm, DrmCrtc *crtc,
+                                Importer *importer, uint64_t frame_no) {
   drm_ = drm;
+  crtc_ = crtc; // Can be NULL if we haven't modeset yet
   importer_ = importer;
+  frame_no_ = frame_no;
 
-  int ret = sw_sync_timeline_create();
+  int ret = hw_get_module(GRALLOC_HARDWARE_MODULE_ID,
+                          (const hw_module_t **)&gralloc_);
+  if (ret) {
+    ALOGE("Failed to open gralloc module %d", ret);
+    return ret;
+  }
+
+  ret = sw_sync_timeline_create();
   if (ret < 0) {
     ALOGE("Failed to create sw sync timeline %d", ret);
     return ret;
@@ -87,54 +131,188 @@
   return type_ == DRM_COMPOSITION_TYPE_EMPTY || type_ == des;
 }
 
-int DrmDisplayComposition::AddLayer(hwc_layer_1_t *layer, hwc_drm_bo_t *bo,
-                                    DrmCrtc *crtc, DrmPlane *plane) {
-  if (!validate_composition_type(DRM_COMPOSITION_TYPE_FRAME))
-    return -EINVAL;
-
-  ++timeline_;
-  layer->releaseFenceFd =
-      sw_sync_fence_create(timeline_fd_, "drm_fence", timeline_);
-  if (layer->releaseFenceFd < 0) {
-    ALOGE("Could not create release fence %d", layer->releaseFenceFd);
-    return layer->releaseFenceFd;
+static DrmPlane *TakePlane(DrmCrtc *crtc, std::vector<DrmPlane *> *planes) {
+  for (auto iter = planes->begin(); iter != planes->end(); ++iter) {
+    if ((*iter)->GetCrtcSupported(*crtc)) {
+      DrmPlane *plane = *iter;
+      planes->erase(iter);
+      return plane;
+    }
   }
-
-  DrmCompositionLayer_t c_layer;
-  c_layer.layer = *layer;
-  c_layer.bo = *bo;
-  c_layer.crtc = crtc;
-  c_layer.plane = plane;
-
-  layer->acquireFenceFd = -1;  // We own this now
-  layers_.push_back(c_layer);
-  type_ = DRM_COMPOSITION_TYPE_FRAME;
-  return 0;
+  return NULL;
 }
 
-int DrmDisplayComposition::AddLayer(hwc_layer_1_t *layer, DrmCrtc *crtc,
-                                    DrmPlane *plane) {
-  if (layer->transform != 0)
-    return -EINVAL;
+static DrmPlane *TakePlane(DrmCrtc *crtc,
+                           std::vector<DrmPlane *> *primary_planes,
+                           std::vector<DrmPlane *> *overlay_planes) {
+  DrmPlane *plane = TakePlane(crtc, primary_planes);
+  if (plane)
+    return plane;
+  return TakePlane(crtc, overlay_planes);
+}
 
-  if (!validate_composition_type(DRM_COMPOSITION_TYPE_FRAME))
-    return -EINVAL;
+int DrmDisplayComposition::CreateNextTimelineFence() {
+  ++timeline_;
+  return sw_sync_fence_create(timeline_fd_, "drm_fence", timeline_);
+}
 
-  hwc_drm_bo_t bo;
-  int ret = importer_->ImportBuffer(layer->handle, &bo);
-  if (ret) {
-    ALOGE("Failed to import handle of layer %d", ret);
-    return ret;
-  }
+int DrmDisplayComposition::IncreaseTimelineToPoint(int point) {
+  int timeline_increase = point - timeline_current_;
+  if (timeline_increase <= 0)
+    return 0;
 
-  ret = AddLayer(layer, &bo, crtc, plane);
+  int ret = sw_sync_timeline_inc(timeline_fd_, timeline_increase);
   if (ret)
-    importer_->ReleaseBuffer(&bo);
+    ALOGE("Failed to increment sync timeline %d", ret);
+  else
+    timeline_current_ = point;
 
   return ret;
 }
 
-int DrmDisplayComposition::AddDpmsMode(uint32_t dpms_mode) {
+int DrmDisplayComposition::SetLayers(hwc_layer_1_t *layers, size_t num_layers,
+                                     size_t *layer_indices,
+                                     std::vector<DrmPlane *> *primary_planes,
+                                     std::vector<DrmPlane *> *overlay_planes) {
+  int ret = 0;
+  if (!validate_composition_type(DRM_COMPOSITION_TYPE_FRAME))
+    return -EINVAL;
+
+  for (size_t layer_index = 0; layer_index < num_layers; layer_index++) {
+    hwc_layer_1_t *layer = &layers[layer_indices[layer_index]];
+
+    native_handle_t *handle_copy = dup_buffer_handle(layer->handle);
+    if (handle_copy == NULL) {
+      ALOGE("Failed to duplicate handle");
+      return -ENOMEM;
+    }
+
+    int ret = gralloc_->registerBuffer(gralloc_, handle_copy);
+    if (ret) {
+      ALOGE("Failed to register buffer handle %d", ret);
+      free_buffer_handle(handle_copy);
+      return ret;
+    }
+
+    layers_.emplace_back();
+    DrmCompositionLayer_t *c_layer = &layers_.back();
+    c_layer->layer = *layer;
+    c_layer->handle = handle_copy;
+    c_layer->crtc = crtc_;
+
+    ret = importer_->ImportBuffer(layer->handle, &c_layer->bo);
+    if (ret) {
+      ALOGE("Failed to import handle of layer %d", ret);
+      goto fail;
+    }
+
+    if (pre_composition_layer_index_ == -1) {
+      c_layer->plane = TakePlane(crtc_, primary_planes, overlay_planes);
+      if (c_layer->plane == NULL) {
+        if (layers_.size() <= 1) {
+          ALOGE("Failed to match any planes to the crtc of this display");
+          ret = -ENODEV;
+          goto fail;
+        }
+
+        layers_.emplace_back();
+        // c_layer's address might have changed when we resized the vector
+        c_layer = &layers_[layers_.size() - 2];
+        DrmCompositionLayer_t &pre_comp_layer = layers_.back();
+        pre_comp_layer.crtc = crtc_;
+        hwc_layer_1_t &pre_comp_output_layer = pre_comp_layer.layer;
+        memset(&pre_comp_output_layer, 0, sizeof(pre_comp_output_layer));
+        pre_comp_output_layer.compositionType = HWC_OVERLAY;
+        pre_comp_output_layer.acquireFenceFd = -1;
+        pre_comp_output_layer.releaseFenceFd = -1;
+        pre_comp_output_layer.planeAlpha = 0xff;
+        pre_comp_output_layer.visibleRegionScreen.numRects = 1;
+        pre_comp_output_layer.visibleRegionScreen.rects =
+            &pre_comp_output_layer.displayFrame;
+
+        pre_composition_layer_index_ = layers_.size() - 1;
+
+        // This is all to fix up the previous layer, which has now become part
+        // of the set of pre-composition layers because we are stealing its
+        // plane.
+        DrmCompositionLayer_t &last_c_layer = layers_[layers_.size() - 3];
+        std::swap(pre_comp_layer.plane, last_c_layer.plane);
+        hwc_layer_1_t *last_layer = &layers[layer_indices[layer_index - 1]];
+        ret = last_layer->releaseFenceFd = CreateNextTimelineFence();
+        if (ret < 0) {
+          ALOGE("Could not create release fence %d", ret);
+          goto fail;
+        }
+      }
+    }
+
+    if (c_layer->plane == NULL) {
+      // Layers to be pre composited all get the earliest release fences as they
+      // will get released soonest.
+      ret = layer->releaseFenceFd = CreateNextTimelineFence();
+      if (ret < 0) {
+        ALOGE("Could not create release fence %d", ret);
+        goto fail;
+      }
+    }
+  }
+
+  timeline_pre_comp_done_ = timeline_;
+
+  for (size_t layer_index = 0; layer_index < num_layers; layer_index++) {
+    hwc_layer_1_t *layer = &layers[layer_indices[layer_index]];
+    if (layer->releaseFenceFd >= 0)
+      continue;
+
+    ret = layer->releaseFenceFd = CreateNextTimelineFence();
+    if (ret < 0) {
+      ALOGE("Could not create release fence %d", ret);
+      goto fail;
+    }
+  }
+
+  for (size_t layer_index = 0; layer_index < num_layers; layer_index++) {
+    hwc_layer_1_t *layer = &layers[layer_indices[layer_index]];
+    layer->acquireFenceFd = -1;  // We own this now
+  }
+
+  type_ = DRM_COMPOSITION_TYPE_FRAME;
+  return 0;
+
+fail:
+
+  for (size_t c_layer_index = 0; c_layer_index < layers_.size();
+       c_layer_index++) {
+    DrmCompositionLayer_t &c_layer = layers_[c_layer_index];
+    if (c_layer.handle) {
+      gralloc_->unregisterBuffer(gralloc_, c_layer.handle);
+      free_buffer_handle(c_layer.handle);
+    }
+    if (c_layer.bo.fb_id)
+      importer_->ReleaseBuffer(&c_layer.bo);
+    if (c_layer.plane != NULL) {
+      std::vector<DrmPlane *> *return_to =
+          (c_layer.plane->type() == DRM_PLANE_TYPE_PRIMARY) ? primary_planes
+                                                            : overlay_planes;
+      return_to->insert(return_to->begin() + c_layer_index, c_layer.plane);
+    }
+  }
+  layers_.clear();
+
+  for (size_t layer_index = 0; layer_index < num_layers; layer_index++) {
+    hwc_layer_1_t *layer = &layers[layer_indices[layer_index]];
+    if (layer->releaseFenceFd >= 0) {
+      close(layer->releaseFenceFd);
+      layer->releaseFenceFd = -1;
+    }
+  }
+  sw_sync_timeline_inc(timeline_fd_, timeline_ - timeline_current_);
+
+  timeline_ = timeline_current_;
+  return ret;
+}
+
+int DrmDisplayComposition::SetDpmsMode(uint32_t dpms_mode) {
   if (!validate_composition_type(DRM_COMPOSITION_TYPE_DPMS))
     return -EINVAL;
   dpms_mode_ = dpms_mode;
@@ -143,32 +321,64 @@
 }
 
 int DrmDisplayComposition::AddPlaneDisable(DrmPlane *plane) {
-  DrmCompositionLayer_t c_layer;
+  layers_.emplace_back();
+  DrmCompositionLayer_t &c_layer = layers_.back();
   c_layer.crtc = NULL;
   c_layer.plane = plane;
-  layers_.push_back(c_layer);
   return 0;
 }
 
+void DrmDisplayComposition::RemoveNoPlaneLayers() {
+  for (auto &comp_layer : layers_) {
+    if (comp_layer.plane != NULL)
+      continue;
+
+    if (importer_ && comp_layer.bo.fb_id) {
+      importer_->ReleaseBuffer(&comp_layer.bo);
+    }
+
+    if (comp_layer.handle) {
+      gralloc_->unregisterBuffer(gralloc_, comp_layer.handle);
+      free_buffer_handle(comp_layer.handle);
+    }
+
+    if (comp_layer.layer.acquireFenceFd >= 0) {
+      close(comp_layer.layer.acquireFenceFd);
+      comp_layer.layer.acquireFenceFd = -1;
+    }
+  }
+
+  layers_.erase(
+      std::remove_if(layers_.begin(), layers_.end(),
+                     [](DrmCompositionLayer_t &l) { return l.plane == NULL; }),
+      layers_.end());
+}
+
+int DrmDisplayComposition::SignalPreCompositionDone() {
+  return IncreaseTimelineToPoint(timeline_pre_comp_done_);
+}
+
 int DrmDisplayComposition::FinishComposition() {
-  int timeline_increase = timeline_ - timeline_current_;
-  if (timeline_increase <= 0)
-    return 0;
-
-  int ret = sw_sync_timeline_inc(timeline_fd_, timeline_increase);
-  if (ret)
-    ALOGE("Failed to increment sync timeline %d", ret);
-  else
-    timeline_current_ = timeline_;
-
-  return ret;
+  return IncreaseTimelineToPoint(timeline_);
 }
 
 DrmCompositionLayerVector_t *DrmDisplayComposition::GetCompositionLayers() {
   return &layers_;
 }
 
+int DrmDisplayComposition::pre_composition_layer_index() const {
+  return pre_composition_layer_index_;
+}
+
 uint32_t DrmDisplayComposition::dpms_mode() const {
   return dpms_mode_;
 }
+
+uint64_t DrmDisplayComposition::frame_no() const {
+  return frame_no_;
+}
+
+Importer *DrmDisplayComposition::importer() const {
+  return importer_;
+}
 }
diff --git a/drmdisplaycomposition.h b/drmdisplaycomposition.h
index 9c51078..be4ecef 100644
--- a/drmdisplaycomposition.h
+++ b/drmdisplaycomposition.h
@@ -18,11 +18,14 @@
 #define ANDROID_DRM_DISPLAY_COMPOSITION_H_
 
 #include "drm_hwcomposer.h"
+#include "drmcrtc.h"
 #include "drmplane.h"
+#include "glworker.h"
 #include "importer.h"
 
 #include <vector>
 
+#include <hardware/gralloc.h>
 #include <hardware/hardware.h>
 #include <hardware/hwcomposer.h>
 
@@ -36,12 +39,12 @@
 
 typedef struct DrmCompositionLayer {
   DrmCompositionLayer();
-  ~DrmCompositionLayer();
 
   hwc_layer_1_t layer;
   hwc_drm_bo_t bo;
   DrmCrtc *crtc;
   DrmPlane *plane;
+  native_handle_t *handle;
 } DrmCompositionLayer_t;
 typedef std::vector<DrmCompositionLayer_t> DrmCompositionLayerVector_t;
 
@@ -50,39 +53,55 @@
   DrmDisplayComposition();
   ~DrmDisplayComposition();
 
-  int Init(DrmResources *drm, Importer *importer);
+  int Init(DrmResources *drm, DrmCrtc *crtc, Importer *importer,
+           uint64_t frame_no);
 
   DrmCompositionType type() const;
 
-  int AddLayer(hwc_layer_1_t *layer, hwc_drm_bo_t *bo, DrmCrtc *crtc,
-               DrmPlane *plane);
-  // Like the AddLayer that accepts a hwc_drm_bo_t, but uses Importer to import
-  // the layer->handle itself.
-  int AddLayer(hwc_layer_1_t *layer, DrmCrtc *crtc, DrmPlane *plane);
+  int SetLayers(hwc_layer_1_t *layers, size_t num_layers, size_t *layer_indices,
+                std::vector<DrmPlane *> *primary_planes,
+                std::vector<DrmPlane *> *overlay_planes);
   int AddPlaneDisable(DrmPlane *plane);
-  int AddDpmsMode(uint32_t dpms_mode);
+  int SetDpmsMode(uint32_t dpms_mode);
 
+  void RemoveNoPlaneLayers();
+  int SignalPreCompositionDone();
   int FinishComposition();
 
   DrmCompositionLayerVector_t *GetCompositionLayers();
+  int pre_composition_layer_index() const;
   uint32_t dpms_mode() const;
 
+  uint64_t frame_no() const;
+
+  Importer *importer() const;
+
  private:
   DrmDisplayComposition(const DrmDisplayComposition &) = delete;
 
   bool validate_composition_type(DrmCompositionType desired);
 
+  int CreateNextTimelineFence();
+  int IncreaseTimelineToPoint(int point);
+
   DrmResources *drm_;
+  DrmCrtc *crtc_;
   Importer *importer_;
+  const gralloc_module_t *gralloc_;
+  EGLDisplay egl_display_;
 
   DrmCompositionType type_;
 
   int timeline_fd_;
   int timeline_;
   int timeline_current_;
+  int timeline_pre_comp_done_;
 
   DrmCompositionLayerVector_t layers_;
+  int pre_composition_layer_index_;
   uint32_t dpms_mode_;
+
+  uint64_t frame_no_;
 };
 }
 
diff --git a/drmdisplaycompositor.cpp b/drmdisplaycompositor.cpp
index 2021923..b293681 100644
--- a/drmdisplaycompositor.cpp
+++ b/drmdisplaycompositor.cpp
@@ -44,7 +44,6 @@
     : drm_(NULL),
       display_(-1),
       worker_(this),
-      frame_no_(0),
       initialized_(false),
       active_(false),
       framebuffer_index_(0),
@@ -165,17 +164,6 @@
   int ret = 0;
   DrmCompositionLayerVector_t *layers = display_comp->GetCompositionLayers();
 
-  auto last_layer = find_if(layers->rbegin(), layers->rend(),
-                            drm_composition_layer_has_plane);
-  if (last_layer == layers->rend()) {
-    ALOGE("Frame has no overlays");
-    return -EINVAL;
-  }
-
-  DrmCompositionLayer_t &comp_layer = *last_layer;
-  DrmPlane *stolen_plane = NULL;
-  std::swap(stolen_plane, comp_layer.plane);
-
   DrmConnector *connector = drm_->GetConnectorForDisplay(display_);
   if (connector == NULL) {
     ALOGE("Failed to determine display mode: no connector for display %d",
@@ -198,69 +186,59 @@
   }
 
   std::vector<hwc_layer_1_t> pre_comp_layers;
-  for (const auto &comp_layer : *layers)
-    if (comp_layer.plane == NULL)
+  for (auto &comp_layer : *layers) {
+    if (comp_layer.plane == NULL) {
       pre_comp_layers.push_back(comp_layer.layer);
-
-  if (!pre_compositor_) {
-    pre_compositor_.reset(new GLWorkerCompositor());
-    ret = pre_compositor_->Init();
-    if (ret) {
-      ALOGE("Failed to initialize OpenGL compositor %d", ret);
-      return ret;
+      pre_comp_layers.back().handle = comp_layer.handle;
+      comp_layer.layer.acquireFenceFd = -1;
     }
   }
-  ret = pre_compositor_->CompositeAndFinish(
-      pre_comp_layers.data(), pre_comp_layers.size(), fb.buffer());
+
+  ret = pre_compositor_->Composite(pre_comp_layers.data(),
+                                   pre_comp_layers.size(), fb.buffer());
+  pre_compositor_->Finish();
+
+  for (auto &pre_comp_layer : pre_comp_layers) {
+    if (pre_comp_layer.acquireFenceFd >= 0) {
+      close(pre_comp_layer.acquireFenceFd);
+      pre_comp_layer.acquireFenceFd = -1;
+    }
+  }
+
   if (ret) {
     ALOGE("Failed to composite layers");
     return ret;
   }
 
-  layers->erase(std::remove_if(layers->begin(), layers->end(),
-                               drm_composition_layer_has_no_plane),
-                layers->end());
-
-  hwc_layer_1_t pre_comp_output_layer;
-  memset(&pre_comp_output_layer, 0, sizeof(pre_comp_output_layer));
-  pre_comp_output_layer.compositionType = HWC_OVERLAY;
+  DrmCompositionLayer_t &pre_comp_layer =
+      layers->at(display_comp->pre_composition_layer_index());
+  ret = display_comp->importer()->ImportBuffer(fb.buffer()->handle,
+                                               &pre_comp_layer.bo);
+  if (ret) {
+    ALOGE("Failed to import handle of layer %d", ret);
+    return ret;
+  }
+  hwc_layer_1_t &pre_comp_output_layer = pre_comp_layer.layer;
   pre_comp_output_layer.handle = fb.buffer()->handle;
-  pre_comp_output_layer.acquireFenceFd = -1;
-  pre_comp_output_layer.releaseFenceFd = -1;
-  pre_comp_output_layer.planeAlpha = 0xff;
-  pre_comp_output_layer.visibleRegionScreen.numRects = 1;
   pre_comp_output_layer.visibleRegionScreen.rects =
       &pre_comp_output_layer.displayFrame;
-  pre_comp_output_layer.sourceCropf.top =
-      pre_comp_output_layer.displayFrame.top = 0;
-  pre_comp_output_layer.sourceCropf.left =
-      pre_comp_output_layer.displayFrame.left = 0;
   pre_comp_output_layer.sourceCropf.right =
       pre_comp_output_layer.displayFrame.right = fb.buffer()->getWidth();
   pre_comp_output_layer.sourceCropf.bottom =
       pre_comp_output_layer.displayFrame.bottom = fb.buffer()->getHeight();
 
-  ret = display_comp->AddLayer(&pre_comp_output_layer,
-                               drm_->GetCrtcForDisplay(display_), stolen_plane);
-  if (ret) {
-    ALOGE("Failed to add composited layer %d", ret);
-    return ret;
-  }
-
   fb.set_release_fence_fd(pre_comp_output_layer.releaseFenceFd);
   framebuffer_index_ = (framebuffer_index_ + 1) % DRM_DISPLAY_BUFFERS;
 
+  display_comp->RemoveNoPlaneLayers();
+  display_comp->SignalPreCompositionDone();
   return ret;
 }
 
 int DrmDisplayCompositor::ApplyFrame(DrmDisplayComposition *display_comp) {
   int ret = 0;
 
-  DrmCompositionLayerVector_t *layers = display_comp->GetCompositionLayers();
-  bool use_pre_comp = std::any_of(layers->begin(), layers->end(),
-                                  drm_composition_layer_has_no_plane);
-
-  if (use_pre_comp) {
+  if (display_comp->pre_composition_layer_index() >= 0) {
     ret = ApplyPreComposite(display_comp);
     if (ret)
       return ret;
@@ -272,6 +250,7 @@
     return -ENOMEM;
   }
 
+  DrmCompositionLayerVector_t *layers = display_comp->GetCompositionLayers();
   for (DrmCompositionLayerVector_t::iterator iter = layers->begin();
        iter != layers->end(); ++iter) {
     hwc_layer_1_t *layer = &iter->layer;
@@ -409,6 +388,16 @@
 
 int DrmDisplayCompositor::Composite() {
   ATRACE_CALL();
+
+  if (!pre_compositor_) {
+    pre_compositor_.reset(new GLWorkerCompositor());
+    int ret = pre_compositor_->Init();
+    if (ret) {
+      ALOGE("Failed to initialize OpenGL compositor %d", ret);
+      return ret;
+    }
+  }
+
   int ret = pthread_mutex_lock(&lock_);
   if (ret) {
     ALOGE("Failed to acquire compositor lock %d", ret);
diff --git a/drmdisplaycompositor.h b/drmdisplaycompositor.h
index 46d71ae..50109b3 100644
--- a/drmdisplaycompositor.h
+++ b/drmdisplaycompositor.h
@@ -63,8 +63,6 @@
   std::queue<std::unique_ptr<DrmDisplayComposition>> composite_queue_;
   std::unique_ptr<DrmDisplayComposition> active_composition_;
 
-  uint64_t frame_no_;
-
   bool initialized_;
   bool active_;
 
diff --git a/drmmode.cpp b/drmmode.cpp
index abd3b32..7f8c04d 100644
--- a/drmmode.cpp
+++ b/drmmode.cpp
@@ -68,8 +68,7 @@
          h_total_ == m.htotal && h_skew_ == m.hskew &&
          v_display_ == m.vdisplay && v_sync_start_ == m.vsync_start &&
          v_sync_end_ == m.vsync_end && v_total_ == m.vtotal &&
-         v_scan_ == m.vscan && flags_ == m.flags &&
-         type_ == m.type;
+         v_scan_ == m.vscan && flags_ == m.flags && type_ == m.type;
 }
 
 void DrmMode::ToDrmModeModeInfo(drm_mode_modeinfo *m) const {
diff --git a/drmproperty.cpp b/drmproperty.cpp
index 378e3a2..7b4a4f9 100644
--- a/drmproperty.cpp
+++ b/drmproperty.cpp
@@ -71,7 +71,6 @@
     type_ = DRM_PROPERTY_TYPE_OBJECT;
   else if (flags_ & DRM_MODE_PROP_BLOB)
     type_ = DRM_PROPERTY_TYPE_BLOB;
-
 }
 
 uint32_t DrmProperty::id() const {
diff --git a/drmresources.cpp b/drmresources.cpp
index feb5187..32fe5cc 100644
--- a/drmresources.cpp
+++ b/drmresources.cpp
@@ -467,18 +467,17 @@
     return -EINVAL;
   }
 
-  DrmComposition *comp = compositor_.CreateComposition(NULL);
+  std::unique_ptr<DrmComposition> comp(compositor_.CreateComposition(NULL));
   if (!comp) {
     ALOGE("Failed to create composition for dpms on %d", display);
     return -ENOMEM;
   }
-  int ret = comp->AddDpmsMode(display, mode);
+  int ret = comp->SetDpmsMode(display, mode);
   if (ret) {
     ALOGE("Failed to add dpms %ld to composition on %d %d", mode, display, ret);
-    delete comp;
     return ret;
   }
-  ret = compositor_.QueueComposition(comp);
+  ret = compositor_.QueueComposition(std::move(comp));
   if (ret) {
     ALOGE("Failed to queue dpms composition on %d %d", display, ret);
     return ret;
diff --git a/glworker.cpp b/glworker.cpp
index df6a8f2..f18ace9 100644
--- a/glworker.cpp
+++ b/glworker.cpp
@@ -15,7 +15,7 @@
  */
 
 #define ATRACE_TAG ATRACE_TAG_GRAPHICS
-#define LOG_TAG "GLWorker"
+#define LOG_TAG "hwc-gl-worker"
 
 #include <algorithm>
 #include <string>
@@ -23,6 +23,8 @@
 
 #include <sys/resource.h>
 
+#include <cutils/properties.h>
+
 #include <hardware/hardware.h>
 #include <hardware/hwcomposer.h>
 
@@ -54,11 +56,7 @@
 // [ 2 4 ]
 float kTextureTransformMatrices[] = {
    1.0f,  0.0f,  0.0f,  1.0f, // identity matrix
-  -1.0f,  0.0f,  0.0f,  1.0f, // HWC_TRANSFORM_FLIP_H;
-   1.0f,  0.0f,  0.0f, -1.0f, // HWC_TRANSFORM_FLIP_V;
-   0.0f,  1.0f, -1.0f,  0.0f, // HWC_TRANSFORM_ROT_90;
-  -1.0f,  0.0f,  0.0f, -1.0f, // HWC_TRANSFORM_ROT_180;
-   0.0f, -1.0f,  1.0f,  0.0f, // HWC_TRANSFORM_ROT_270;
+   0.0f,  1.0f,  1.0f,  0.0f, // swap x and y
 };
 // clang-format on
 
@@ -194,9 +192,9 @@
 "out vec2 fTexCoords[LAYER_COUNT];                                          \n"
 "void main() {                                                              \n"
 "  for (int i = 0; i < LAYER_COUNT; i++) {                                  \n"
-"    fTexCoords[i] = (uLayerCrop[i].xy + vTexCoords * uLayerCrop[i].zw) /   \n"
+"    vec2 tempCoords = vTexCoords * uTexMatrix[i];                          \n"
+"    fTexCoords[i] = (uLayerCrop[i].xy + tempCoords * uLayerCrop[i].zw) /   \n"
 "                     vec2(textureSize(uLayerTextures[i], 0));              \n"
-"    fTexCoords[i] *= uTexMatrix[i];                                        \n"
 "  }                                                                        \n"
 "  vec2 scaledPosition = uViewport.xy + vPosition * uViewport.zw;           \n"
 "  gl_Position = vec4(scaledPosition * vec2(2.0) - vec2(1.0), 0.0, 1.0);    \n"
@@ -352,39 +350,49 @@
         cmd.texture_count++;
         src.texture_index = i;
 
-        for (int b = 0; b < 4; b++) {
-          float bound_percent = (cmd.bounds[b] - display_rect.bounds[b % 2]) /
-                                display_size[b % 2];
-          src.crop_bounds[b] =
-              crop_rect.bounds[b % 2] + bound_percent * crop_size[b % 2];
-        }
-
-        float *src_tex_mat;
+        bool swap_xy, flip_xy[2];
         switch (layer.transform) {
           case HWC_TRANSFORM_FLIP_H:
-            src_tex_mat = &kTextureTransformMatrices[4];
+            swap_xy = false; flip_xy[0] = true; flip_xy[1] = false;
             break;
           case HWC_TRANSFORM_FLIP_V:
-            src_tex_mat = &kTextureTransformMatrices[8];
+            swap_xy = false; flip_xy[0] = false; flip_xy[1] = true;
             break;
           case HWC_TRANSFORM_ROT_90:
-            src_tex_mat = &kTextureTransformMatrices[12];
+            swap_xy = true; flip_xy[0] = false; flip_xy[1] = true;
             break;
           case HWC_TRANSFORM_ROT_180:
-            src_tex_mat = &kTextureTransformMatrices[16];
+            swap_xy = false; flip_xy[0] = true; flip_xy[1] = true;
             break;
           case HWC_TRANSFORM_ROT_270:
-            src_tex_mat = &kTextureTransformMatrices[20];
+            swap_xy = true; flip_xy[0] = true; flip_xy[1] = false;
             break;
           default:
             ALOGE(
                 "Unknown transform for layer: defaulting to identity "
                 "transform");
           case 0:
-            src_tex_mat = &kTextureTransformMatrices[0];
+            swap_xy = false; flip_xy[0] = false; flip_xy[1] = false;
             break;
         }
-        std::copy_n(src_tex_mat, 4, src.texture_matrix);
+
+        if (swap_xy)
+          std::copy_n(&kTextureTransformMatrices[4], 4, src.texture_matrix);
+        else
+          std::copy_n(&kTextureTransformMatrices[0], 4, src.texture_matrix);
+
+        for (int j = 0; j < 4; j++) {
+          int b = j ^ (swap_xy ? 1 : 0);
+          float bound_percent = (cmd.bounds[b] - display_rect.bounds[b % 2]) /
+                                display_size[b % 2];
+          if (flip_xy[j % 2]) {
+            src.crop_bounds[j] =
+                crop_rect.bounds[j % 2 + 2] - bound_percent * crop_size[j % 2];
+          } else {
+            src.crop_bounds[j] =
+                crop_rect.bounds[j % 2] + bound_percent * crop_size[j % 2];
+          }
+        }
 
         if (layer.blending == HWC_BLENDING_NONE) {
           src.alpha = 1.0f;
@@ -444,7 +452,7 @@
   glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT);
   glBindTexture(GL_TEXTURE_2D, 0);
 
-  out->image.reset(image);
+  out->image.reset(egl_display, image);
   out->texture.reset(texture);
 
   return 0;
@@ -552,7 +560,7 @@
 }
 
 int GLWorkerCompositor::Composite(hwc_layer_1 *layers, size_t num_layers,
-                                  sp<GraphicBuffer> framebuffer) {
+                                  const sp<GraphicBuffer> &framebuffer) {
   ATRACE_CALL();
   int ret = 0;
   size_t i;
@@ -565,54 +573,31 @@
 
   GLint frame_width = framebuffer->getWidth();
   GLint frame_height = framebuffer->getHeight();
-  EGLSyncKHR finished_sync;
-
-  AutoEGLImageKHR egl_fb_image(
-      eglCreateImageKHR(egl_display_, EGL_NO_CONTEXT, EGL_NATIVE_BUFFER_ANDROID,
-                        (EGLClientBuffer)framebuffer->getNativeBuffer(),
-                        NULL /* no attribs */),
-      EGLImageDeleter(egl_display_));
-
-  if (egl_fb_image.get() == EGL_NO_IMAGE_KHR) {
-    ALOGE("Failed to make image from target buffer: %s", GetEGLError());
-    return -EINVAL;
-  }
-
-  GLuint gl_fb_tex;
-  glGenTextures(1, &gl_fb_tex);
-  AutoGLTexture gl_fb_tex_auto(gl_fb_tex);
-  glBindTexture(GL_TEXTURE_2D, gl_fb_tex);
-  glEGLImageTargetTexture2DOES(GL_TEXTURE_2D,
-                               (GLeglImageOES)egl_fb_image.get());
-  glBindTexture(GL_TEXTURE_2D, 0);
-
-  GLuint gl_fb;
-  glGenFramebuffers(1, &gl_fb);
-  AutoGLFramebuffer gl_fb_auto(gl_fb);
-  glBindFramebuffer(GL_FRAMEBUFFER, gl_fb);
-  glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
-                         gl_fb_tex, 0);
-
-  if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
-    ALOGE("Failed framebuffer check for created target buffer: %s",
-          GetGLFramebufferError());
+  CachedFramebuffer *cached_framebuffer =
+      PrepareAndCacheFramebuffer(framebuffer);
+  if (cached_framebuffer == NULL) {
+    ALOGE("Composite failed because of failed framebuffer");
     return -EINVAL;
   }
 
   for (i = 0; i < num_layers; i++) {
-    const struct hwc_layer_1 *layer = &layers[i];
+    struct hwc_layer_1 *layer = &layers[i];
 
     if (ret) {
-      if (layer->acquireFenceFd >= 0)
+      if (layer->acquireFenceFd >= 0) {
         close(layer->acquireFenceFd);
+        layer->acquireFenceFd = -1;
+      }
       continue;
     }
 
-    layer_textures.emplace_back(egl_display_);
+    layer_textures.emplace_back();
     ret = CreateTextureFromHandle(egl_display_, layer->handle,
                                   &layer_textures.back());
+
     if (!ret) {
       ret = EGLFenceWait(egl_display_, layer->acquireFenceFd);
+      layer->acquireFenceFd = -1;
     }
     if (ret) {
       layer_textures.pop_back();
@@ -699,12 +684,103 @@
   return ret;
 }
 
-int GLWorkerCompositor::CompositeAndFinish(hwc_layer_1 *layers,
-                                           size_t num_layers,
-                                           sp<GraphicBuffer> framebuffer) {
-  int ret = Composite(layers, num_layers, framebuffer);
+void GLWorkerCompositor::Finish() {
+  ATRACE_CALL();
   glFinish();
-  return ret;
+
+  char use_framebuffer_cache_opt[PROPERTY_VALUE_MAX];
+  property_get("hwc.drm.use_framebuffer_cache", use_framebuffer_cache_opt, "1");
+  bool use_framebuffer_cache = atoi(use_framebuffer_cache_opt);
+
+  if (use_framebuffer_cache) {
+    for (auto &fb : cached_framebuffers_)
+      fb.strong_framebuffer.clear();
+  } else {
+    cached_framebuffers_.clear();
+  }
+}
+
+GLWorkerCompositor::CachedFramebuffer::CachedFramebuffer(
+    const sp<GraphicBuffer> &gb, AutoEGLDisplayImage &&image,
+    AutoGLTexture &&tex, AutoGLFramebuffer &&fb)
+    : strong_framebuffer(gb),
+      weak_framebuffer(gb),
+      egl_fb_image(std::move(image)),
+      gl_fb_tex(std::move(tex)),
+      gl_fb(std::move(fb)) {
+}
+
+bool GLWorkerCompositor::CachedFramebuffer::Promote() {
+  if (strong_framebuffer.get() != NULL)
+    return true;
+  strong_framebuffer = weak_framebuffer.promote();
+  return strong_framebuffer.get() != NULL;
+}
+
+GLWorkerCompositor::CachedFramebuffer *
+GLWorkerCompositor::FindCachedFramebuffer(
+    const sp<GraphicBuffer> &framebuffer) {
+  for (auto &fb : cached_framebuffers_)
+    if (fb.weak_framebuffer == framebuffer)
+      return &fb;
+  return NULL;
+}
+
+GLWorkerCompositor::CachedFramebuffer *
+GLWorkerCompositor::PrepareAndCacheFramebuffer(
+    const sp<GraphicBuffer> &framebuffer) {
+  CachedFramebuffer *cached_framebuffer = FindCachedFramebuffer(framebuffer);
+  if (cached_framebuffer != NULL) {
+    if (cached_framebuffer->Promote()) {
+      glBindFramebuffer(GL_FRAMEBUFFER, cached_framebuffer->gl_fb.get());
+      return cached_framebuffer;
+    }
+
+    for (auto it = cached_framebuffers_.begin();
+         it != cached_framebuffers_.end(); ++it) {
+      if (it->weak_framebuffer == framebuffer) {
+        cached_framebuffers_.erase(it);
+        break;
+      }
+    }
+  }
+
+  AutoEGLDisplayImage egl_fb_image(
+      egl_display_,
+      eglCreateImageKHR(egl_display_, EGL_NO_CONTEXT, EGL_NATIVE_BUFFER_ANDROID,
+                        (EGLClientBuffer)framebuffer->getNativeBuffer(),
+                        NULL /* no attribs */));
+
+  if (egl_fb_image.image() == EGL_NO_IMAGE_KHR) {
+    ALOGE("Failed to make image from target buffer: %s", GetEGLError());
+    return NULL;
+  }
+
+  GLuint gl_fb_tex;
+  glGenTextures(1, &gl_fb_tex);
+  AutoGLTexture gl_fb_tex_auto(gl_fb_tex);
+  glBindTexture(GL_TEXTURE_2D, gl_fb_tex);
+  glEGLImageTargetTexture2DOES(GL_TEXTURE_2D,
+                               (GLeglImageOES)egl_fb_image.image());
+  glBindTexture(GL_TEXTURE_2D, 0);
+
+  GLuint gl_fb;
+  glGenFramebuffers(1, &gl_fb);
+  AutoGLFramebuffer gl_fb_auto(gl_fb);
+  glBindFramebuffer(GL_FRAMEBUFFER, gl_fb);
+  glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+                         gl_fb_tex, 0);
+
+  if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+    ALOGE("Failed framebuffer check for created target buffer: %s",
+          GetGLFramebufferError());
+    return NULL;
+  }
+
+  cached_framebuffers_.emplace_back(framebuffer, std::move(egl_fb_image),
+                                    std::move(gl_fb_tex_auto),
+                                    std::move(gl_fb_auto));
+  return &cached_framebuffers_.back();
 }
 
 }  // namespace android
diff --git a/glworker.h b/glworker.h
index 8252b62..ce41444 100644
--- a/glworker.h
+++ b/glworker.h
@@ -17,7 +17,6 @@
 #ifndef ANDROID_GL_WORKER_H_
 #define ANDROID_GL_WORKER_H_
 
-#include <memory>
 #include <vector>
 
 #define EGL_EGLEXT_PROTOTYPES
@@ -30,71 +29,49 @@
 
 #include <ui/GraphicBuffer.h>
 
-struct hwc_layer_1;
+#include "autogl.h"
 
 namespace android {
 
-#define AUTO_GL_TYPE(name, type, zero, deleter) \
-  struct name##Deleter {                        \
-    typedef type pointer;                       \
-                                                \
-    void operator()(pointer p) const {          \
-      if (p != zero) {                          \
-        deleter;                                \
-      }                                         \
-    }                                           \
-  };                                            \
-  typedef std::unique_ptr<type, name##Deleter> name;
-
-AUTO_GL_TYPE(AutoGLFramebuffer, GLuint, 0, glDeleteFramebuffers(1, &p))
-AUTO_GL_TYPE(AutoGLBuffer, GLuint, 0, glDeleteBuffers(1, &p))
-AUTO_GL_TYPE(AutoGLTexture, GLuint, 0, glDeleteTextures(1, &p))
-AUTO_GL_TYPE(AutoGLShader, GLint, 0, glDeleteShader(p))
-AUTO_GL_TYPE(AutoGLProgram, GLint, 0, glDeleteProgram(p))
-
-struct EGLImageDeleter {
-  typedef EGLImageKHR pointer;
-
-  EGLDisplay egl_display_;
-
-  EGLImageDeleter(EGLDisplay egl_display) : egl_display_(egl_display) {
-  }
-
-  void operator()(EGLImageKHR p) const {
-    if (p != EGL_NO_IMAGE_KHR) {
-      eglDestroyImageKHR(egl_display_, p);
-    }
-  }
-};
-typedef std::unique_ptr<EGLImageKHR, EGLImageDeleter> AutoEGLImageKHR;
-
-struct AutoEGLImageAndGLTexture {
-  AutoEGLImageKHR image;
-  AutoGLTexture texture;
-
-  AutoEGLImageAndGLTexture(EGLDisplay egl_display)
-      : image(EGL_NO_IMAGE_KHR, EGLImageDeleter(egl_display)) {
-  }
-};
-
 class GLWorkerCompositor {
  public:
   GLWorkerCompositor();
   ~GLWorkerCompositor();
 
   int Init();
-
   int Composite(hwc_layer_1 *layers, size_t num_layers,
-                sp<GraphicBuffer> framebuffer);
-  int CompositeAndFinish(hwc_layer_1 *layers, size_t num_layers,
-                         sp<GraphicBuffer> framebuffer);
+                const sp<GraphicBuffer> &framebuffer);
+  void Finish();
 
  private:
+  struct CachedFramebuffer {
+    // If the strong_framebuffer is non-NULL, we are holding a strong reference
+    // until we are sure rendering is done. The weak reference will be equal in
+    // that case.
+    sp<GraphicBuffer> strong_framebuffer;
+    wp<GraphicBuffer> weak_framebuffer;
+    AutoEGLDisplayImage egl_fb_image;
+    AutoGLTexture gl_fb_tex;
+    AutoGLFramebuffer gl_fb;
+
+    CachedFramebuffer(const sp<GraphicBuffer> &gb, AutoEGLDisplayImage &&image,
+                      AutoGLTexture &&tex, AutoGLFramebuffer &&fb);
+
+    bool Promote();
+  };
+
+  CachedFramebuffer *FindCachedFramebuffer(
+      const sp<GraphicBuffer> &framebuffer);
+  CachedFramebuffer *PrepareAndCacheFramebuffer(
+      const sp<GraphicBuffer> &framebuffer);
+
   EGLDisplay egl_display_;
   EGLContext egl_ctx_;
 
   std::vector<AutoGLProgram> blend_programs_;
   AutoGLBuffer vertex_buffer_;
+
+  std::vector<CachedFramebuffer> cached_framebuffers_;
 };
 }
 
diff --git a/hwcomposer.cpp b/hwcomposer.cpp
index a83c247..060a955 100644
--- a/hwcomposer.cpp
+++ b/hwcomposer.cpp
@@ -22,12 +22,14 @@
 #include "importer.h"
 #include "vsyncworker.h"
 
+#include <stdlib.h>
+
+#include <map>
+#include <vector>
+
 #include <errno.h>
 #include <fcntl.h>
-#include <list>
-#include <map>
 #include <pthread.h>
-#include <stdlib.h>
 #include <sys/param.h>
 #include <sys/resource.h>
 #include <xf86drm.h>
@@ -58,7 +60,7 @@
   typedef std::map<int, hwc_drm_display_t> DisplayMap;
   typedef DisplayMap::iterator DisplayMapIter;
 
-  hwc_context_t() : procs(NULL), importer(NULL) {
+  hwc_context_t() : procs(NULL), importer(NULL), use_framebuffer_target(false) {
   }
 
   ~hwc_context_t() {
@@ -71,9 +73,10 @@
   DisplayMap displays;
   DrmResources drm;
   Importer *importer;
+  bool use_framebuffer_target;
 };
 
-static void hwc_dump(struct hwc_composer_device_1* dev, char *buff,
+static void hwc_dump(struct hwc_composer_device_1 *dev, char *buff,
                      int buff_len) {
   struct hwc_context_t *ctx = (struct hwc_context_t *)&dev->common;
   std::ostringstream out;
@@ -86,6 +89,15 @@
 static int hwc_prepare(hwc_composer_device_1_t *dev, size_t num_displays,
                        hwc_display_contents_1_t **display_contents) {
   struct hwc_context_t *ctx = (struct hwc_context_t *)&dev->common;
+
+  char use_framebuffer_target[PROPERTY_VALUE_MAX];
+  property_get("hwc.drm.use_framebuffer_target", use_framebuffer_target, "0");
+  bool new_use_framebuffer_target = atoi(use_framebuffer_target);
+  if (ctx->use_framebuffer_target != new_use_framebuffer_target)
+    ALOGW("Starting to %s HWC_FRAMEBUFFER_TARGET",
+          new_use_framebuffer_target ? "use" : "not use");
+  ctx->use_framebuffer_target = new_use_framebuffer_target;
+
   for (int i = 0; i < (int)num_displays; ++i) {
     if (!display_contents[i])
       continue;
@@ -100,8 +112,19 @@
     for (int j = 0; j < num_layers; j++) {
       hwc_layer_1_t *layer = &display_contents[i]->hwLayers[j];
 
-      if (layer->compositionType == HWC_FRAMEBUFFER)
-        layer->compositionType = HWC_OVERLAY;
+      if (!ctx->use_framebuffer_target) {
+        if (layer->compositionType == HWC_FRAMEBUFFER)
+          layer->compositionType = HWC_OVERLAY;
+      } else {
+        switch (layer->compositionType) {
+          case HWC_OVERLAY:
+          case HWC_BACKGROUND:
+          case HWC_SIDEBAND:
+          case HWC_CURSOR_OVERLAY:
+            layer->compositionType = HWC_FRAMEBUFFER;
+            break;
+        }
+      }
     }
   }
 
@@ -109,8 +132,7 @@
 }
 
 static void hwc_set_cleanup(size_t num_displays,
-                            hwc_display_contents_1_t **display_contents,
-                            DrmComposition *composition) {
+                            hwc_display_contents_1_t **display_contents) {
   for (int i = 0; i < (int)num_displays; ++i) {
     if (!display_contents[i])
       continue;
@@ -128,39 +150,17 @@
       dc->outbufAcquireFenceFd = -1;
     }
   }
-
-  delete composition;
 }
 
-static int hwc_add_layer(int display, hwc_context_t *ctx, hwc_layer_1_t *layer,
-                         DrmComposition *composition) {
-  hwc_drm_bo_t bo;
-  int ret = ctx->importer->ImportBuffer(layer->handle, &bo);
-  if (ret) {
-    ALOGE("Failed to import handle to bo %d", ret);
-    return ret;
-  }
-
-  ret = composition->AddLayer(display, layer, &bo);
-  if (!ret)
-    return 0;
-
-  int destroy_ret = ctx->importer->ReleaseBuffer(&bo);
-  if (destroy_ret)
-    ALOGE("Failed to destroy buffer %d", destroy_ret);
-
-  return ret;
-}
-
-static void hwc_add_layer_to_retire_fence(hwc_layer_1_t *layer,
-    hwc_display_contents_1_t *display_contents) {
+static void hwc_add_layer_to_retire_fence(
+    hwc_layer_1_t *layer, hwc_display_contents_1_t *display_contents) {
   if (layer->releaseFenceFd < 0)
     return;
 
   if (display_contents->retireFenceFd >= 0) {
     int old_retire_fence = display_contents->retireFenceFd;
-    display_contents->retireFenceFd = sync_merge("dc_retire", old_retire_fence,
-                                                 layer->releaseFenceFd);
+    display_contents->retireFenceFd =
+        sync_merge("dc_retire", old_retire_fence, layer->releaseFenceFd);
     close(old_retire_fence);
   } else {
     display_contents->retireFenceFd = dup(layer->releaseFenceFd);
@@ -171,64 +171,106 @@
                    hwc_display_contents_1_t **display_contents) {
   ATRACE_CALL();
   struct hwc_context_t *ctx = (struct hwc_context_t *)&dev->common;
-  DrmComposition *composition =
-      ctx->drm.compositor()->CreateComposition(ctx->importer);
+  int ret;
+  std::unique_ptr<DrmComposition> composition(
+      ctx->drm.compositor()->CreateComposition(ctx->importer));
   if (!composition) {
     ALOGE("Drm composition init failed");
-    hwc_set_cleanup(num_displays, display_contents, NULL);
+    hwc_set_cleanup(num_displays, display_contents);
     return -EINVAL;
   }
 
-  int ret;
+  std::vector<DrmCompositionDisplayLayersMap> layers_map;
+  std::vector<std::vector<size_t>> layers_indices;
+  layers_map.reserve(num_displays);
+  layers_indices.reserve(num_displays);
+
   for (int i = 0; i < (int)num_displays; ++i) {
     if (!display_contents[i])
       continue;
-
     hwc_display_contents_1_t *dc = display_contents[i];
-    int j;
-    unsigned num_layers = 0;
+
+    layers_map.emplace_back();
+    DrmCompositionDisplayLayersMap &map = layers_map[i];
+    map.display = i;
+    map.layers = dc->hwLayers;
+
+    std::vector<size_t> indices_to_composite;
     unsigned num_dc_layers = dc->numHwLayers;
-    for (j = 0; j < (int)num_dc_layers; ++j) {
+    int framebuffer_target_index = -1;
+    for (int j = 0; j < (int)num_dc_layers; ++j) {
+      hwc_layer_1_t *layer = &dc->hwLayers[j];
+      if (layer->flags & HWC_SKIP_LAYER)
+        continue;
+      if (!ctx->use_framebuffer_target) {
+        if (layer->compositionType == HWC_OVERLAY)
+          indices_to_composite.push_back(j);
+        if (layer->compositionType == HWC_FRAMEBUFFER_TARGET)
+          framebuffer_target_index = j;
+      } else {
+        if (layer->compositionType == HWC_FRAMEBUFFER_TARGET)
+          indices_to_composite.push_back(j);
+      }
+    }
+    if (ctx->use_framebuffer_target) {
+      if (indices_to_composite.size() != 1) {
+        ALOGE("Expected 1 (got %d) layer with HWC_FRAMEBUFFER_TARGET",
+              indices_to_composite.size());
+        hwc_set_cleanup(num_displays, display_contents);
+        return -EINVAL;
+      }
+    } else {
+      if (indices_to_composite.empty() && framebuffer_target_index >= 0) {
+        // Fall back to use HWC_FRAMEBUFFER_TARGET if all HWC_OVERLAY layers
+        // are skipped.
+        hwc_layer_1_t *layer = &dc->hwLayers[framebuffer_target_index];
+        if (!layer->handle || (layer->flags & HWC_SKIP_LAYER)) {
+          ALOGE("Expected valid layer with HWC_FRAMEBUFFER_TARGET when all "
+                "HWC_OVERLAY layers are skipped.");
+          hwc_set_cleanup(num_displays, display_contents);
+          return -EINVAL;
+        }
+        indices_to_composite.push_back(framebuffer_target_index);
+      }
+    }
+
+    map.num_layers = indices_to_composite.size();
+    layers_indices.emplace_back(std::move(indices_to_composite));
+    map.layer_indices = layers_indices.back().data();
+  }
+
+  ret = composition->SetLayers(layers_map.size(), layers_map.data());
+  if (ret) {
+    hwc_set_cleanup(num_displays, display_contents);
+    return -EINVAL;
+  }
+
+  ret = ctx->drm.compositor()->QueueComposition(std::move(composition));
+  if (ret) {
+    hwc_set_cleanup(num_displays, display_contents);
+    return -EINVAL;
+  }
+
+  composition.reset(NULL);
+
+  for (int i = 0; i < (int)num_displays; ++i) {
+    if (!display_contents[i])
+      continue;
+    hwc_display_contents_1_t *dc = display_contents[i];
+    unsigned num_dc_layers = dc->numHwLayers;
+    for (int j = 0; j < (int)num_dc_layers; ++j) {
       hwc_layer_1_t *layer = &dc->hwLayers[j];
       if (layer->flags & HWC_SKIP_LAYER)
         continue;
       if (layer->compositionType == HWC_OVERLAY)
-        num_layers++;
-    }
-
-    unsigned num_planes = composition->GetRemainingLayers(i, num_layers);
-
-    if (num_layers > num_planes) {
-      ALOGE("Can not composite %u with only %u planes", num_layers, num_planes);
-    }
-
-    for (j = 0; num_planes && j < (int)num_dc_layers; ++j) {
-      hwc_layer_1_t *layer = &dc->hwLayers[j];
-      if (layer->flags & HWC_SKIP_LAYER)
-        continue;
-      if (layer->compositionType != HWC_OVERLAY)
-        continue;
-
-      ret = hwc_add_layer(i, ctx, layer, composition);
-      if (ret) {
-        ALOGE("Add layer failed %d", ret);
-        hwc_set_cleanup(num_displays, display_contents, composition);
-        return ret;
-      }
-      hwc_add_layer_to_retire_fence(layer, dc);
-
-      --num_planes;
+        hwc_add_layer_to_retire_fence(layer, dc);
     }
   }
 
-  ret = ctx->drm.compositor()->QueueComposition(composition);
-  composition = NULL;
   if (ret) {
     ALOGE("Failed to queue the composition");
-    hwc_set_cleanup(num_displays, display_contents, NULL);
-    return ret;
   }
-  hwc_set_cleanup(num_displays, display_contents, NULL);
+  hwc_set_cleanup(num_displays, display_contents);
   return ret;
 }
 
diff --git a/nvimporter.cpp b/nvimporter.cpp
index 93b9129..de3ed55 100644
--- a/nvimporter.cpp
+++ b/nvimporter.cpp
@@ -121,7 +121,7 @@
   return 0;
 }
 
-int NvImporter::ReleaseBuffer(hwc_drm_bo_t * bo) {
+int NvImporter::ReleaseBuffer(hwc_drm_bo_t *bo) {
   NvBuffer_t *buf = (NvBuffer_t *)bo->priv;
   if (!buf) {
     ALOGE("Freeing bo %ld, buf is NULL!", bo->fb_id);
