[sf] add more debug logs to identify layer leaks

Bug: 272200348
Test: logcat
Change-Id: Icdb37a820b2028e69ff3b1a2e05c8bc9e7dd7dd4
diff --git a/services/surfaceflinger/Layer.cpp b/services/surfaceflinger/Layer.cpp
index 0f2af2f..b60b387 100644
--- a/services/surfaceflinger/Layer.cpp
+++ b/services/surfaceflinger/Layer.cpp
@@ -335,6 +335,7 @@
         return nullptr;
     }
     mGetHandleCalled = true;
+    mHandleAlive = true;
     return sp<LayerHandle>::make(mFlinger, sp<Layer>::fromExisting(this));
 }
 
@@ -1649,9 +1650,9 @@
     mFlinger->mFrameTracer->onDestroy(layerId);
 }
 
-size_t Layer::getChildrenCount() const {
+size_t Layer::getDescendantCount() const {
     size_t count = 0;
-    for (const sp<Layer>& child : mCurrentChildren) {
+    for (const sp<Layer>& child : mDrawingChildren) {
         count += 1 + child->getChildrenCount();
     }
     return count;
@@ -1898,6 +1899,12 @@
     }
 }
 
+void Layer::traverseChildren(const LayerVector::Visitor& visitor) {
+    for (const sp<Layer>& child : mDrawingChildren) {
+        visitor(child.get());
+    }
+}
+
 LayerVector Layer::makeChildrenTraversalList(LayerVector::StateSet stateSet,
                                              const std::vector<Layer*>& layersInTree) {
     LOG_ALWAYS_FATAL_IF(stateSet == LayerVector::StateSet::Invalid,
diff --git a/services/surfaceflinger/Layer.h b/services/surfaceflinger/Layer.h
index 2fb122c..6cce1a6 100644
--- a/services/surfaceflinger/Layer.h
+++ b/services/surfaceflinger/Layer.h
@@ -700,6 +700,7 @@
     void traverse(LayerVector::StateSet, const LayerVector::Visitor&);
     void traverseInReverseZOrder(LayerVector::StateSet, const LayerVector::Visitor&);
     void traverseInZOrder(LayerVector::StateSet, const LayerVector::Visitor&);
+    void traverseChildren(const LayerVector::Visitor&);
 
     /**
      * Traverse only children in z order, ignoring relative layers that are not children of the
@@ -707,7 +708,10 @@
      */
     void traverseChildrenInZOrder(LayerVector::StateSet, const LayerVector::Visitor&);
 
-    size_t getChildrenCount() const;
+    size_t getDescendantCount() const;
+    size_t getChildrenCount() const { return mDrawingChildren.size(); }
+    bool isHandleAlive() const { return mHandleAlive; }
+    bool onHandleDestroyed() { return mHandleAlive = false; }
 
     // ONLY CALL THIS FROM THE LAYER DTOR!
     // See b/141111965.  We need to add current children to offscreen layers in
@@ -1191,6 +1195,7 @@
     std::vector<std::pair<frontend::LayerHierarchy::TraversalPath, sp<LayerFE>>> mLayerFEs;
     std::unique_ptr<frontend::LayerSnapshot> mSnapshot =
             std::make_unique<frontend::LayerSnapshot>();
+    bool mHandleAlive = false;
 };
 
 std::ostream& operator<<(std::ostream& stream, const Layer::FrameRate& rate);
diff --git a/services/surfaceflinger/SurfaceFlinger.cpp b/services/surfaceflinger/SurfaceFlinger.cpp
index 8ce479a..c0c6835 100644
--- a/services/surfaceflinger/SurfaceFlinger.cpp
+++ b/services/surfaceflinger/SurfaceFlinger.cpp
@@ -4090,8 +4090,34 @@
         ALOGE("AddClientLayer failed, mNumLayers (%zu) >= MAX_LAYERS (%zu)", mNumLayers.load(),
               MAX_LAYERS);
         static_cast<void>(mScheduler->schedule([=] {
+            ALOGE("Dumping layer keeping > 20 children alive:");
+            bool leakingParentLayerFound = false;
+            mDrawingState.traverse([&](Layer* layer) {
+                if (leakingParentLayerFound) {
+                    return;
+                }
+                if (layer->getChildrenCount() > 20) {
+                    leakingParentLayerFound = true;
+                    sp<Layer> parent = sp<Layer>::fromExisting(layer);
+                    while (parent) {
+                        ALOGE("Parent Layer: %s handleIsAlive: %s", parent->getName().c_str(),
+                              std::to_string(parent->isHandleAlive()).c_str());
+                        parent = parent->getParent();
+                    }
+                    // Sample up to 100 layers
+                    ALOGE("Dumping random sampling of child layers total(%zu): ",
+                          layer->getChildrenCount());
+                    int sampleSize = (layer->getChildrenCount() / 100) + 1;
+                    layer->traverseChildren([&](Layer* layer) {
+                        if (rand() % sampleSize == 0) {
+                            ALOGE("Child Layer: %s", layer->getName().c_str());
+                        }
+                    });
+                }
+            });
+
             ALOGE("Dumping random sampling of on-screen layers: ");
-            mDrawingState.traverse([&](Layer *layer) {
+            mDrawingState.traverse([&](Layer* layer) {
                 // Aim to dump about 200 layers to avoid totally trashing
                 // logcat. On the other hand, if there really are 4096 layers
                 // something has gone totally wrong its probably the most
@@ -4100,6 +4126,8 @@
                     ALOGE("Layer: %s", layer->getName().c_str());
                 }
             });
+            ALOGE("Dumping random sampling of off-screen layers total(%zu): ",
+                  mOffscreenLayers.size());
             for (Layer* offscreenLayer : mOffscreenLayers) {
                 if (rand() % 20 == 13) {
                     ALOGE("Offscreen-layer: %s", offscreenLayer->getName().c_str());
@@ -5297,6 +5325,7 @@
 
     Mutex::Autolock lock(mStateLock);
     markLayerPendingRemovalLocked(layer);
+    layer->onHandleDestroyed();
     mBufferCountTracker.remove(handle);
     layer.clear();