[Vulkan] Utilize Skia's new VK_ERROR_DEVICE_LOST callback to log

Skia will invoke this callback when it encounters VK_ERROR_DEVICE_LOST.
Since HWUI enables the VK_EXT_device_fault extension (when available),
additional data should be provided from the driver. If the extension is
not available, Skia will provide a generic `description` indicating
that no info is available.

The goal of this logging is to provide as much verbose information as
possible in the logs, while condensing any differentiating info into a
more succinct crash message to aid clustering.

Copied/adapted from RE's callback landed in:
Ibda00573a1db9d8aaa83057207f0f699cc756e33

Bug: 313369997
Test: manually forced crash
Change-Id: If6a6d04ea99cf6f270c8b481e325d9e0ee548385
diff --git a/libs/hwui/renderthread/VulkanManager.cpp b/libs/hwui/renderthread/VulkanManager.cpp
index d55d28d..b5f7caa 100644
--- a/libs/hwui/renderthread/VulkanManager.cpp
+++ b/libs/hwui/renderthread/VulkanManager.cpp
@@ -31,6 +31,8 @@
 #include <vk/GrVkExtensions.h>
 #include <vk/GrVkTypes.h>
 
+#include <sstream>
+
 #include "Properties.h"
 #include "RenderThread.h"
 #include "pipeline/skia/ShaderCache.h"
@@ -40,7 +42,8 @@
 namespace uirenderer {
 namespace renderthread {
 
-static std::array<std::string_view, 20> sEnableExtensions{
+// Not all of these are strictly required, but are all enabled if present.
+static std::array<std::string_view, 21> sEnableExtensions{
         VK_KHR_BIND_MEMORY_2_EXTENSION_NAME,
         VK_KHR_DEDICATED_ALLOCATION_EXTENSION_NAME,
         VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
@@ -61,6 +64,7 @@
         VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
         VK_KHR_ANDROID_SURFACE_EXTENSION_NAME,
         VK_EXT_GLOBAL_PRIORITY_EXTENSION_NAME,
+        VK_EXT_DEVICE_FAULT_EXTENSION_NAME,
 };
 
 static bool shouldEnableExtension(const std::string_view& extension) {
@@ -303,6 +307,15 @@
     *tailPNext = ycbcrFeature;
     tailPNext = &ycbcrFeature->pNext;
 
+    if (grExtensions.hasExtension(VK_EXT_DEVICE_FAULT_EXTENSION_NAME, 1)) {
+        VkPhysicalDeviceFaultFeaturesEXT* deviceFaultFeatures =
+                new VkPhysicalDeviceFaultFeaturesEXT;
+        deviceFaultFeatures->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT;
+        deviceFaultFeatures->pNext = nullptr;
+        *tailPNext = deviceFaultFeatures;
+        tailPNext = &deviceFaultFeatures->pNext;
+    }
+
     // query to get the physical device features
     mGetPhysicalDeviceFeatures2(mPhysicalDevice, &features);
     // this looks like it would slow things down,
@@ -405,6 +418,79 @@
     });
 }
 
+namespace {
+void onVkDeviceFault(const std::string& contextLabel, const std::string& description,
+                     const std::vector<VkDeviceFaultAddressInfoEXT>& addressInfos,
+                     const std::vector<VkDeviceFaultVendorInfoEXT>& vendorInfos,
+                     const std::vector<std::byte>& vendorBinaryData) {
+    // The final crash string should contain as much differentiating info as possible, up to 1024
+    // bytes. As this final message is constructed, the same information is also dumped to the logs
+    // but in a more verbose format. Building the crash string is unsightly, so the clearer logging
+    // statement is always placed first to give context.
+    ALOGE("VK_ERROR_DEVICE_LOST (%s context): %s", contextLabel.c_str(), description.c_str());
+    std::stringstream crashMsg;
+    crashMsg << "VK_ERROR_DEVICE_LOST (" << contextLabel;
+
+    if (!addressInfos.empty()) {
+        ALOGE("%zu VkDeviceFaultAddressInfoEXT:", addressInfos.size());
+        crashMsg << ", " << addressInfos.size() << " address info (";
+        for (VkDeviceFaultAddressInfoEXT addressInfo : addressInfos) {
+            ALOGE(" addressType:       %d", (int)addressInfo.addressType);
+            ALOGE("  reportedAddress:  %" PRIu64, addressInfo.reportedAddress);
+            ALOGE("  addressPrecision: %" PRIu64, addressInfo.addressPrecision);
+            crashMsg << addressInfo.addressType << ":"
+                     << addressInfo.reportedAddress << ":"
+                     << addressInfo.addressPrecision << ", ";
+        }
+        crashMsg.seekp(-2, crashMsg.cur);  // Move back to overwrite trailing ", "
+        crashMsg << ")";
+    }
+
+    if (!vendorInfos.empty()) {
+        ALOGE("%zu VkDeviceFaultVendorInfoEXT:", vendorInfos.size());
+        crashMsg << ", " << vendorInfos.size() << " vendor info (";
+        for (VkDeviceFaultVendorInfoEXT vendorInfo : vendorInfos) {
+            ALOGE(" description:      %s", vendorInfo.description);
+            ALOGE("  vendorFaultCode: %" PRIu64, vendorInfo.vendorFaultCode);
+            ALOGE("  vendorFaultData: %" PRIu64, vendorInfo.vendorFaultData);
+            // Omit descriptions for individual vendor info structs in the crash string, as the
+            // fault code and fault data fields should be enough for clustering, and the verbosity
+            // isn't worth it. Additionally, vendors may just set the general description field of
+            // the overall fault to the description of the first element in this list, and that
+            // overall description will be placed at the end of the crash string.
+            crashMsg << vendorInfo.vendorFaultCode << ":"
+                     << vendorInfo.vendorFaultData << ", ";
+        }
+        crashMsg.seekp(-2, crashMsg.cur);  // Move back to overwrite trailing ", "
+        crashMsg << ")";
+    }
+
+    if (!vendorBinaryData.empty()) {
+        // TODO: b/322830575 - Log in base64, or dump directly to a file that gets put in bugreports
+        ALOGE("%zu bytes of vendor-specific binary data (please notify Android's Core Graphics"
+              " Stack team if you observe this message).",
+              vendorBinaryData.size());
+        crashMsg << ", " << vendorBinaryData.size() << " bytes binary";
+    }
+
+    crashMsg << "): " << description;
+    LOG_ALWAYS_FATAL("%s", crashMsg.str().c_str());
+}
+
+void deviceLostProcRenderThread(void* callbackContext, const std::string& description,
+                                const std::vector<VkDeviceFaultAddressInfoEXT>& addressInfos,
+                                const std::vector<VkDeviceFaultVendorInfoEXT>& vendorInfos,
+                                const std::vector<std::byte>& vendorBinaryData) {
+    onVkDeviceFault("RenderThread", description, addressInfos, vendorInfos, vendorBinaryData);
+}
+void deviceLostProcUploadThread(void* callbackContext, const std::string& description,
+                                const std::vector<VkDeviceFaultAddressInfoEXT>& addressInfos,
+                                const std::vector<VkDeviceFaultVendorInfoEXT>& vendorInfos,
+                                const std::vector<std::byte>& vendorBinaryData) {
+    onVkDeviceFault("UploadThread", description, addressInfos, vendorInfos, vendorBinaryData);
+}
+}  // anonymous namespace
+
 static void onGrContextReleased(void* context) {
     VulkanManager* manager = (VulkanManager*)context;
     manager->decStrong((void*)onGrContextReleased);
@@ -430,6 +516,10 @@
     backendContext.fVkExtensions = &mExtensions;
     backendContext.fDeviceFeatures2 = &mPhysicalDeviceFeatures2;
     backendContext.fGetProc = std::move(getProc);
+    backendContext.fDeviceLostContext = nullptr;
+    backendContext.fDeviceLostProc = (contextType == ContextType::kRenderThread)
+                                             ? deviceLostProcRenderThread
+                                             : deviceLostProcUploadThread;
 
     LOG_ALWAYS_FATAL_IF(options.fContextDeleteProc != nullptr, "Conflicting fContextDeleteProcs!");
     this->incStrong((void*)onGrContextReleased);