libjpegrecoverymap: add support for multi-threading

Generate recovery map and apply recovery map process are now split
across threads.

Bug: 261877699
Test: push files from tests/data to /sdcard/Documents and then \
 atest libjpegdecoder_test libjpegencoder_test libjpegrecoverymap_test

Change-Id: I05ccdb170bfe1c2647bbbcd3662c97350b41b3f1
diff --git a/libs/jpegrecoverymap/recoverymap.cpp b/libs/jpegrecoverymap/recoverymap.cpp
index 22fa35b..222b5f0 100644
--- a/libs/jpegrecoverymap/recoverymap.cpp
+++ b/libs/jpegrecoverymap/recoverymap.cpp
@@ -30,6 +30,11 @@
 #include <sstream>
 #include <string>
 #include <cmath>
+#include <condition_variable>
+#include <deque>
+#include <mutex>
+#include <thread>
+#include <unistd.h>
 
 using namespace std;
 using namespace photos_editing_formats::image_io;
@@ -62,6 +67,20 @@
   1.0f,
 };
 
+#define CONFIG_MULTITHREAD 1
+int GetCPUCoreCount() {
+  int cpuCoreCount = 1;
+#if CONFIG_MULTITHREAD
+#if defined(_SC_NPROCESSORS_ONLN)
+  cpuCoreCount = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+  // _SC_NPROC_ONLN must be defined...
+  cpuCoreCount = sysconf(_SC_NPROC_ONLN);
+#endif
+#endif
+  return cpuCoreCount;
+}
+
 /*
  * Helper function used for writing data to destination.
  *
@@ -626,6 +645,62 @@
   return NO_ERROR;
 }
 
+const int kJobSzInRows = 16;
+static_assert(kJobSzInRows > 0 && kJobSzInRows % kMapDimensionScaleFactor == 0,
+              "align job size to kMapDimensionScaleFactor");
+
+class JobQueue {
+ public:
+  bool dequeueJob(size_t& rowStart, size_t& rowEnd);
+  void enqueueJob(size_t rowStart, size_t rowEnd);
+  void markQueueForEnd();
+  void reset();
+
+ private:
+  bool mQueuedAllJobs = false;
+  std::deque<std::tuple<size_t, size_t>> mJobs;
+  std::mutex mMutex;
+  std::condition_variable mCv;
+};
+
+bool JobQueue::dequeueJob(size_t& rowStart, size_t& rowEnd) {
+  std::unique_lock<std::mutex> lock{mMutex};
+  while (true) {
+    if (mJobs.empty()) {
+      if (mQueuedAllJobs) {
+        return false;
+      } else {
+        mCv.wait(lock);
+      }
+    } else {
+      auto it = mJobs.begin();
+      rowStart = std::get<0>(*it);
+      rowEnd = std::get<1>(*it);
+      mJobs.erase(it);
+      return true;
+    }
+  }
+  return false;
+}
+
+void JobQueue::enqueueJob(size_t rowStart, size_t rowEnd) {
+  std::unique_lock<std::mutex> lock{mMutex};
+  mJobs.push_back(std::make_tuple(rowStart, rowEnd));
+  lock.unlock();
+  mCv.notify_one();
+}
+
+void JobQueue::markQueueForEnd() {
+  std::unique_lock<std::mutex> lock{mMutex};
+  mQueuedAllJobs = true;
+}
+
+void JobQueue::reset() {
+  std::unique_lock<std::mutex> lock{mMutex};
+  mJobs.clear();
+  mQueuedAllJobs = false;
+}
+
 status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_image,
                                           jr_uncompressed_ptr uncompressed_p010_image,
                                           jr_metadata_ptr metadata,
@@ -697,22 +772,83 @@
       return ERROR_JPEGR_INVALID_COLORGAMUT;
   }
 
+  std::mutex mutex;
   float hdr_y_nits_max = 0.0f;
   double hdr_y_nits_avg = 0.0f;
-  for (size_t y = 0; y < image_height; ++y) {
-    for (size_t x = 0; x < image_width; ++x) {
-      Color hdr_yuv_gamma = getP010Pixel(uncompressed_p010_image, x, y);
-      Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
-      Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
-      hdr_rgb = hdrGamutConversionFn(hdr_rgb);
-      float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;
+  const int threads = std::clamp(GetCPUCoreCount(), 1, 4);
+  size_t rowStep = threads == 1 ? image_height : kJobSzInRows;
+  JobQueue jobQueue;
 
-      hdr_y_nits_avg += hdr_y_nits;
-      if (hdr_y_nits > hdr_y_nits_max) {
-        hdr_y_nits_max = hdr_y_nits;
+  std::function<void()> computeMetadata = [uncompressed_p010_image, hdrInvOetf,
+                                           hdrGamutConversionFn, luminanceFn, hdr_white_nits,
+                                           threads, &mutex, &hdr_y_nits_avg,
+                                           &hdr_y_nits_max, &jobQueue]() -> void {
+    size_t rowStart, rowEnd;
+    float hdr_y_nits_max_th = 0.0f;
+    double hdr_y_nits_avg_th = 0.0f;
+    while (jobQueue.dequeueJob(rowStart, rowEnd)) {
+      for (size_t y = rowStart; y < rowEnd; ++y) {
+        for (size_t x = 0; x < uncompressed_p010_image->width; ++x) {
+          Color hdr_yuv_gamma = getP010Pixel(uncompressed_p010_image, x, y);
+          Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
+          Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
+          hdr_rgb = hdrGamutConversionFn(hdr_rgb);
+          float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;
+
+          hdr_y_nits_avg_th += hdr_y_nits;
+          if (hdr_y_nits > hdr_y_nits_max_th) {
+            hdr_y_nits_max_th = hdr_y_nits;
+          }
+        }
       }
     }
+    std::unique_lock<std::mutex> lock{mutex};
+    hdr_y_nits_avg += hdr_y_nits_avg_th;
+    hdr_y_nits_max = std::max(hdr_y_nits_max, hdr_y_nits_max_th);
+  };
+
+  std::function<void()> generateMap = [uncompressed_yuv_420_image, uncompressed_p010_image,
+                                       metadata, dest, hdrInvOetf, hdrGamutConversionFn,
+                                       luminanceFn, hdr_white_nits, &jobQueue]() -> void {
+    size_t rowStart, rowEnd;
+    while (jobQueue.dequeueJob(rowStart, rowEnd)) {
+      for (size_t y = rowStart; y < rowEnd; ++y) {
+        for (size_t x = 0; x < dest->width; ++x) {
+          Color sdr_yuv_gamma =
+              sampleYuv420(uncompressed_yuv_420_image, kMapDimensionScaleFactor, x, y);
+          Color sdr_rgb_gamma = srgbYuvToRgb(sdr_yuv_gamma);
+          Color sdr_rgb = srgbInvOetf(sdr_rgb_gamma);
+          float sdr_y_nits = luminanceFn(sdr_rgb) * kSdrWhiteNits;
+
+          Color hdr_yuv_gamma = sampleP010(uncompressed_p010_image, kMapDimensionScaleFactor, x, y);
+          Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
+          Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
+          hdr_rgb = hdrGamutConversionFn(hdr_rgb);
+          float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;
+
+          size_t pixel_idx = x + y * dest->width;
+          reinterpret_cast<uint8_t*>(dest->data)[pixel_idx] =
+              encodeRecovery(sdr_y_nits, hdr_y_nits, metadata->rangeScalingFactor);
+        }
+      }
+    }
+  };
+
+  std::vector<std::thread> workers;
+  for (int th = 0; th < threads - 1; th++) {
+    workers.push_back(std::thread(computeMetadata));
   }
+
+  // compute metadata
+  for (size_t rowStart = 0; rowStart < image_height;) {
+    size_t rowEnd = std::min(rowStart + rowStep, image_height);
+    jobQueue.enqueueJob(rowStart, rowEnd);
+    rowStart = rowEnd;
+  }
+  jobQueue.markQueueForEnd();
+  computeMetadata();
+  std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
+  workers.clear();
   hdr_y_nits_avg /= image_width * image_height;
 
   metadata->rangeScalingFactor = hdr_y_nits_max / kSdrWhiteNits;
@@ -721,26 +857,22 @@
     metadata->hdr10Metadata.maxCLL = hdr_y_nits_max;
   }
 
-  for (size_t y = 0; y < map_height; ++y) {
-    for (size_t x = 0; x < map_width; ++x) {
-      Color sdr_yuv_gamma = sampleYuv420(uncompressed_yuv_420_image,
-                                         kMapDimensionScaleFactor, x, y);
-      Color sdr_rgb_gamma = srgbYuvToRgb(sdr_yuv_gamma);
-      Color sdr_rgb = srgbInvOetf(sdr_rgb_gamma);
-      float sdr_y_nits = luminanceFn(sdr_rgb) * kSdrWhiteNits;
-
-      Color hdr_yuv_gamma = sampleP010(uncompressed_p010_image, kMapDimensionScaleFactor, x, y);
-      Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
-      Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
-      hdr_rgb = hdrGamutConversionFn(hdr_rgb);
-      float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;
-
-      size_t pixel_idx =  x + y * map_width;
-      reinterpret_cast<uint8_t*>(dest->data)[pixel_idx] =
-          encodeRecovery(sdr_y_nits, hdr_y_nits, metadata->rangeScalingFactor);
-    }
+  // generate map
+  jobQueue.reset();
+  for (int th = 0; th < threads - 1; th++) {
+    workers.push_back(std::thread(generateMap));
   }
 
+  rowStep = (threads == 1 ? image_height : kJobSzInRows) / kMapDimensionScaleFactor;
+  for (size_t rowStart = 0; rowStart < map_height;) {
+    size_t rowEnd = std::min(rowStart + rowStep, map_height);
+    jobQueue.enqueueJob(rowStart, rowEnd);
+    rowStart = rowEnd;
+  }
+  jobQueue.markQueueForEnd();
+  generateMap();
+  std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
+
   map_data.release();
   return NO_ERROR;
 }
@@ -756,58 +888,79 @@
     return ERROR_JPEGR_INVALID_NULL_PTR;
   }
 
-  size_t width = uncompressed_yuv_420_image->width;
-  size_t height = uncompressed_yuv_420_image->height;
-
-  dest->width = width;
-  dest->height = height;
-  size_t pixel_count = width * height;
-
-  ColorTransformFn hdrOetf = nullptr;
-  switch (metadata->transferFunction) {
-    case JPEGR_TF_LINEAR:
-      hdrOetf = identityConversion;
-      break;
-    case JPEGR_TF_HLG:
-      hdrOetf = hlgOetf;
-      break;
-    case JPEGR_TF_PQ:
-      hdrOetf = pqOetf;
-      break;
-    case JPEGR_TF_UNSPECIFIED:
-      // Should be impossible to hit after input validation.
-      return ERROR_JPEGR_INVALID_TRANS_FUNC;
-  }
-
+  dest->width = uncompressed_yuv_420_image->width;
+  dest->height = uncompressed_yuv_420_image->height;
   ShepardsIDW idwTable(kMapDimensionScaleFactor);
 
-  for (size_t y = 0; y < height; ++y) {
-    for (size_t x = 0; x < width; ++x) {
-      Color yuv_gamma_sdr = getYuv420Pixel(uncompressed_yuv_420_image, x, y);
-      Color rgb_gamma_sdr = srgbYuvToRgb(yuv_gamma_sdr);
-      Color rgb_sdr = srgbInvOetf(rgb_gamma_sdr);
+  JobQueue jobQueue;
+  std::function<void()> applyRecMap = [uncompressed_yuv_420_image, uncompressed_recovery_map,
+                                       metadata, dest, &jobQueue, &idwTable]() -> void {
+    const float hdr_ratio = metadata->rangeScalingFactor;
+    size_t width = uncompressed_yuv_420_image->width;
+    size_t height = uncompressed_yuv_420_image->height;
 
-      float recovery;
-      // TODO: determine map scaling factor based on actual map dims
-      size_t map_scale_factor = kMapDimensionScaleFactor;
-      // TODO: If map_scale_factor is guaranteed to be an integer, then remove the following.
-      // Currently map_scale_factor is of type size_t, but it could be changed to a float
-      // later.
-      if (map_scale_factor != floorf(map_scale_factor)) {
-        recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y);
-      } else {
-        recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y,
-                             idwTable);
-      }
-      Color rgb_hdr = applyRecovery(rgb_sdr, recovery, metadata->rangeScalingFactor);
-
-      Color rgb_gamma_hdr = hdrOetf(rgb_hdr / metadata->rangeScalingFactor);
-      uint32_t rgba1010102 = colorToRgba1010102(rgb_gamma_hdr);
-
-      size_t pixel_idx =  x + y * width;
-      reinterpret_cast<uint32_t*>(dest->data)[pixel_idx] = rgba1010102;
+    ColorTransformFn hdrOetf = nullptr;
+    switch (metadata->transferFunction) {
+      case JPEGR_TF_LINEAR:
+        hdrOetf = identityConversion;
+        break;
+      case JPEGR_TF_HLG:
+        hdrOetf = hlgOetf;
+        break;
+      case JPEGR_TF_PQ:
+        hdrOetf = pqOetf;
+        break;
+      case JPEGR_TF_UNSPECIFIED:
+        // Should be impossible to hit after input validation.
+        hdrOetf = identityConversion;
     }
+
+    size_t rowStart, rowEnd;
+    while (jobQueue.dequeueJob(rowStart, rowEnd)) {
+      for (size_t y = rowStart; y < rowEnd; ++y) {
+        for (size_t x = 0; x < width; ++x) {
+          Color yuv_gamma_sdr = getYuv420Pixel(uncompressed_yuv_420_image, x, y);
+          Color rgb_gamma_sdr = srgbYuvToRgb(yuv_gamma_sdr);
+          Color rgb_sdr = srgbInvOetf(rgb_gamma_sdr);
+
+          float recovery;
+          // TODO: determine map scaling factor based on actual map dims
+          size_t map_scale_factor = kMapDimensionScaleFactor;
+          // TODO: If map_scale_factor is guaranteed to be an integer, then remove the following.
+          // Currently map_scale_factor is of type size_t, but it could be changed to a float
+          // later.
+          if (map_scale_factor != floorf(map_scale_factor)) {
+            recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y);
+          } else {
+            recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y,
+                                idwTable);
+          }
+          Color rgb_hdr = applyRecovery(rgb_sdr, recovery, hdr_ratio);
+
+          Color rgb_gamma_hdr = hdrOetf(rgb_hdr / metadata->rangeScalingFactor);
+          uint32_t rgba1010102 = colorToRgba1010102(rgb_gamma_hdr);
+
+          size_t pixel_idx = x + y * width;
+          reinterpret_cast<uint32_t*>(dest->data)[pixel_idx] = rgba1010102;
+        }
+      }
+    }
+  };
+
+  const int threads = std::clamp(GetCPUCoreCount(), 1, 4);
+  std::vector<std::thread> workers;
+  for (int th = 0; th < threads - 1; th++) {
+    workers.push_back(std::thread(applyRecMap));
   }
+  const int rowStep = threads == 1 ? uncompressed_yuv_420_image->height : kJobSzInRows;
+  for (int rowStart = 0; rowStart < uncompressed_yuv_420_image->height;) {
+    int rowEnd = std::min(rowStart + rowStep, uncompressed_yuv_420_image->height);
+    jobQueue.enqueueJob(rowStart, rowEnd);
+    rowStart = rowEnd;
+  }
+  jobQueue.markQueueForEnd();
+  applyRecMap();
+  std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
   return NO_ERROR;
 }