libjpegrecoverymap: add support for multi-threading
Generate recovery map and apply recovery map process are now split
across threads.
Bug: 261877699
Test: push files from tests/data to /sdcard/Documents and then \
atest libjpegdecoder_test libjpegencoder_test libjpegrecoverymap_test
Change-Id: I05ccdb170bfe1c2647bbbcd3662c97350b41b3f1
diff --git a/libs/jpegrecoverymap/recoverymap.cpp b/libs/jpegrecoverymap/recoverymap.cpp
index 22fa35b..222b5f0 100644
--- a/libs/jpegrecoverymap/recoverymap.cpp
+++ b/libs/jpegrecoverymap/recoverymap.cpp
@@ -30,6 +30,11 @@
#include <sstream>
#include <string>
#include <cmath>
+#include <condition_variable>
+#include <deque>
+#include <mutex>
+#include <thread>
+#include <unistd.h>
using namespace std;
using namespace photos_editing_formats::image_io;
@@ -62,6 +67,20 @@
1.0f,
};
+#define CONFIG_MULTITHREAD 1
+int GetCPUCoreCount() {
+ int cpuCoreCount = 1;
+#if CONFIG_MULTITHREAD
+#if defined(_SC_NPROCESSORS_ONLN)
+ cpuCoreCount = sysconf(_SC_NPROCESSORS_ONLN);
+#else
+ // _SC_NPROC_ONLN must be defined...
+ cpuCoreCount = sysconf(_SC_NPROC_ONLN);
+#endif
+#endif
+ return cpuCoreCount;
+}
+
/*
* Helper function used for writing data to destination.
*
@@ -626,6 +645,62 @@
return NO_ERROR;
}
+const int kJobSzInRows = 16;
+static_assert(kJobSzInRows > 0 && kJobSzInRows % kMapDimensionScaleFactor == 0,
+ "align job size to kMapDimensionScaleFactor");
+
+class JobQueue {
+ public:
+ bool dequeueJob(size_t& rowStart, size_t& rowEnd);
+ void enqueueJob(size_t rowStart, size_t rowEnd);
+ void markQueueForEnd();
+ void reset();
+
+ private:
+ bool mQueuedAllJobs = false;
+ std::deque<std::tuple<size_t, size_t>> mJobs;
+ std::mutex mMutex;
+ std::condition_variable mCv;
+};
+
+bool JobQueue::dequeueJob(size_t& rowStart, size_t& rowEnd) {
+ std::unique_lock<std::mutex> lock{mMutex};
+ while (true) {
+ if (mJobs.empty()) {
+ if (mQueuedAllJobs) {
+ return false;
+ } else {
+ mCv.wait(lock);
+ }
+ } else {
+ auto it = mJobs.begin();
+ rowStart = std::get<0>(*it);
+ rowEnd = std::get<1>(*it);
+ mJobs.erase(it);
+ return true;
+ }
+ }
+ return false;
+}
+
+void JobQueue::enqueueJob(size_t rowStart, size_t rowEnd) {
+ std::unique_lock<std::mutex> lock{mMutex};
+ mJobs.push_back(std::make_tuple(rowStart, rowEnd));
+ lock.unlock();
+ mCv.notify_one();
+}
+
+void JobQueue::markQueueForEnd() {
+ std::unique_lock<std::mutex> lock{mMutex};
+ mQueuedAllJobs = true;
+}
+
+void JobQueue::reset() {
+ std::unique_lock<std::mutex> lock{mMutex};
+ mJobs.clear();
+ mQueuedAllJobs = false;
+}
+
status_t RecoveryMap::generateRecoveryMap(jr_uncompressed_ptr uncompressed_yuv_420_image,
jr_uncompressed_ptr uncompressed_p010_image,
jr_metadata_ptr metadata,
@@ -697,22 +772,83 @@
return ERROR_JPEGR_INVALID_COLORGAMUT;
}
+ std::mutex mutex;
float hdr_y_nits_max = 0.0f;
double hdr_y_nits_avg = 0.0f;
- for (size_t y = 0; y < image_height; ++y) {
- for (size_t x = 0; x < image_width; ++x) {
- Color hdr_yuv_gamma = getP010Pixel(uncompressed_p010_image, x, y);
- Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
- Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
- hdr_rgb = hdrGamutConversionFn(hdr_rgb);
- float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;
+ const int threads = std::clamp(GetCPUCoreCount(), 1, 4);
+ size_t rowStep = threads == 1 ? image_height : kJobSzInRows;
+ JobQueue jobQueue;
- hdr_y_nits_avg += hdr_y_nits;
- if (hdr_y_nits > hdr_y_nits_max) {
- hdr_y_nits_max = hdr_y_nits;
+ std::function<void()> computeMetadata = [uncompressed_p010_image, hdrInvOetf,
+ hdrGamutConversionFn, luminanceFn, hdr_white_nits,
+ threads, &mutex, &hdr_y_nits_avg,
+ &hdr_y_nits_max, &jobQueue]() -> void {
+ size_t rowStart, rowEnd;
+ float hdr_y_nits_max_th = 0.0f;
+ double hdr_y_nits_avg_th = 0.0f;
+ while (jobQueue.dequeueJob(rowStart, rowEnd)) {
+ for (size_t y = rowStart; y < rowEnd; ++y) {
+ for (size_t x = 0; x < uncompressed_p010_image->width; ++x) {
+ Color hdr_yuv_gamma = getP010Pixel(uncompressed_p010_image, x, y);
+ Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
+ Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
+ hdr_rgb = hdrGamutConversionFn(hdr_rgb);
+ float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;
+
+ hdr_y_nits_avg_th += hdr_y_nits;
+ if (hdr_y_nits > hdr_y_nits_max_th) {
+ hdr_y_nits_max_th = hdr_y_nits;
+ }
+ }
}
}
+ std::unique_lock<std::mutex> lock{mutex};
+ hdr_y_nits_avg += hdr_y_nits_avg_th;
+ hdr_y_nits_max = std::max(hdr_y_nits_max, hdr_y_nits_max_th);
+ };
+
+ std::function<void()> generateMap = [uncompressed_yuv_420_image, uncompressed_p010_image,
+ metadata, dest, hdrInvOetf, hdrGamutConversionFn,
+ luminanceFn, hdr_white_nits, &jobQueue]() -> void {
+ size_t rowStart, rowEnd;
+ while (jobQueue.dequeueJob(rowStart, rowEnd)) {
+ for (size_t y = rowStart; y < rowEnd; ++y) {
+ for (size_t x = 0; x < dest->width; ++x) {
+ Color sdr_yuv_gamma =
+ sampleYuv420(uncompressed_yuv_420_image, kMapDimensionScaleFactor, x, y);
+ Color sdr_rgb_gamma = srgbYuvToRgb(sdr_yuv_gamma);
+ Color sdr_rgb = srgbInvOetf(sdr_rgb_gamma);
+ float sdr_y_nits = luminanceFn(sdr_rgb) * kSdrWhiteNits;
+
+ Color hdr_yuv_gamma = sampleP010(uncompressed_p010_image, kMapDimensionScaleFactor, x, y);
+ Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
+ Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
+ hdr_rgb = hdrGamutConversionFn(hdr_rgb);
+ float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;
+
+ size_t pixel_idx = x + y * dest->width;
+ reinterpret_cast<uint8_t*>(dest->data)[pixel_idx] =
+ encodeRecovery(sdr_y_nits, hdr_y_nits, metadata->rangeScalingFactor);
+ }
+ }
+ }
+ };
+
+ std::vector<std::thread> workers;
+ for (int th = 0; th < threads - 1; th++) {
+ workers.push_back(std::thread(computeMetadata));
}
+
+ // compute metadata
+ for (size_t rowStart = 0; rowStart < image_height;) {
+ size_t rowEnd = std::min(rowStart + rowStep, image_height);
+ jobQueue.enqueueJob(rowStart, rowEnd);
+ rowStart = rowEnd;
+ }
+ jobQueue.markQueueForEnd();
+ computeMetadata();
+ std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
+ workers.clear();
hdr_y_nits_avg /= image_width * image_height;
metadata->rangeScalingFactor = hdr_y_nits_max / kSdrWhiteNits;
@@ -721,26 +857,22 @@
metadata->hdr10Metadata.maxCLL = hdr_y_nits_max;
}
- for (size_t y = 0; y < map_height; ++y) {
- for (size_t x = 0; x < map_width; ++x) {
- Color sdr_yuv_gamma = sampleYuv420(uncompressed_yuv_420_image,
- kMapDimensionScaleFactor, x, y);
- Color sdr_rgb_gamma = srgbYuvToRgb(sdr_yuv_gamma);
- Color sdr_rgb = srgbInvOetf(sdr_rgb_gamma);
- float sdr_y_nits = luminanceFn(sdr_rgb) * kSdrWhiteNits;
-
- Color hdr_yuv_gamma = sampleP010(uncompressed_p010_image, kMapDimensionScaleFactor, x, y);
- Color hdr_rgb_gamma = bt2100YuvToRgb(hdr_yuv_gamma);
- Color hdr_rgb = hdrInvOetf(hdr_rgb_gamma);
- hdr_rgb = hdrGamutConversionFn(hdr_rgb);
- float hdr_y_nits = luminanceFn(hdr_rgb) * hdr_white_nits;
-
- size_t pixel_idx = x + y * map_width;
- reinterpret_cast<uint8_t*>(dest->data)[pixel_idx] =
- encodeRecovery(sdr_y_nits, hdr_y_nits, metadata->rangeScalingFactor);
- }
+ // generate map
+ jobQueue.reset();
+ for (int th = 0; th < threads - 1; th++) {
+ workers.push_back(std::thread(generateMap));
}
+ rowStep = (threads == 1 ? image_height : kJobSzInRows) / kMapDimensionScaleFactor;
+ for (size_t rowStart = 0; rowStart < map_height;) {
+ size_t rowEnd = std::min(rowStart + rowStep, map_height);
+ jobQueue.enqueueJob(rowStart, rowEnd);
+ rowStart = rowEnd;
+ }
+ jobQueue.markQueueForEnd();
+ generateMap();
+ std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
+
map_data.release();
return NO_ERROR;
}
@@ -756,58 +888,79 @@
return ERROR_JPEGR_INVALID_NULL_PTR;
}
- size_t width = uncompressed_yuv_420_image->width;
- size_t height = uncompressed_yuv_420_image->height;
-
- dest->width = width;
- dest->height = height;
- size_t pixel_count = width * height;
-
- ColorTransformFn hdrOetf = nullptr;
- switch (metadata->transferFunction) {
- case JPEGR_TF_LINEAR:
- hdrOetf = identityConversion;
- break;
- case JPEGR_TF_HLG:
- hdrOetf = hlgOetf;
- break;
- case JPEGR_TF_PQ:
- hdrOetf = pqOetf;
- break;
- case JPEGR_TF_UNSPECIFIED:
- // Should be impossible to hit after input validation.
- return ERROR_JPEGR_INVALID_TRANS_FUNC;
- }
-
+ dest->width = uncompressed_yuv_420_image->width;
+ dest->height = uncompressed_yuv_420_image->height;
ShepardsIDW idwTable(kMapDimensionScaleFactor);
- for (size_t y = 0; y < height; ++y) {
- for (size_t x = 0; x < width; ++x) {
- Color yuv_gamma_sdr = getYuv420Pixel(uncompressed_yuv_420_image, x, y);
- Color rgb_gamma_sdr = srgbYuvToRgb(yuv_gamma_sdr);
- Color rgb_sdr = srgbInvOetf(rgb_gamma_sdr);
+ JobQueue jobQueue;
+ std::function<void()> applyRecMap = [uncompressed_yuv_420_image, uncompressed_recovery_map,
+ metadata, dest, &jobQueue, &idwTable]() -> void {
+ const float hdr_ratio = metadata->rangeScalingFactor;
+ size_t width = uncompressed_yuv_420_image->width;
+ size_t height = uncompressed_yuv_420_image->height;
- float recovery;
- // TODO: determine map scaling factor based on actual map dims
- size_t map_scale_factor = kMapDimensionScaleFactor;
- // TODO: If map_scale_factor is guaranteed to be an integer, then remove the following.
- // Currently map_scale_factor is of type size_t, but it could be changed to a float
- // later.
- if (map_scale_factor != floorf(map_scale_factor)) {
- recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y);
- } else {
- recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y,
- idwTable);
- }
- Color rgb_hdr = applyRecovery(rgb_sdr, recovery, metadata->rangeScalingFactor);
-
- Color rgb_gamma_hdr = hdrOetf(rgb_hdr / metadata->rangeScalingFactor);
- uint32_t rgba1010102 = colorToRgba1010102(rgb_gamma_hdr);
-
- size_t pixel_idx = x + y * width;
- reinterpret_cast<uint32_t*>(dest->data)[pixel_idx] = rgba1010102;
+ ColorTransformFn hdrOetf = nullptr;
+ switch (metadata->transferFunction) {
+ case JPEGR_TF_LINEAR:
+ hdrOetf = identityConversion;
+ break;
+ case JPEGR_TF_HLG:
+ hdrOetf = hlgOetf;
+ break;
+ case JPEGR_TF_PQ:
+ hdrOetf = pqOetf;
+ break;
+ case JPEGR_TF_UNSPECIFIED:
+ // Should be impossible to hit after input validation.
+ hdrOetf = identityConversion;
}
+
+ size_t rowStart, rowEnd;
+ while (jobQueue.dequeueJob(rowStart, rowEnd)) {
+ for (size_t y = rowStart; y < rowEnd; ++y) {
+ for (size_t x = 0; x < width; ++x) {
+ Color yuv_gamma_sdr = getYuv420Pixel(uncompressed_yuv_420_image, x, y);
+ Color rgb_gamma_sdr = srgbYuvToRgb(yuv_gamma_sdr);
+ Color rgb_sdr = srgbInvOetf(rgb_gamma_sdr);
+
+ float recovery;
+ // TODO: determine map scaling factor based on actual map dims
+ size_t map_scale_factor = kMapDimensionScaleFactor;
+ // TODO: If map_scale_factor is guaranteed to be an integer, then remove the following.
+ // Currently map_scale_factor is of type size_t, but it could be changed to a float
+ // later.
+ if (map_scale_factor != floorf(map_scale_factor)) {
+ recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y);
+ } else {
+ recovery = sampleMap(uncompressed_recovery_map, map_scale_factor, x, y,
+ idwTable);
+ }
+ Color rgb_hdr = applyRecovery(rgb_sdr, recovery, hdr_ratio);
+
+ Color rgb_gamma_hdr = hdrOetf(rgb_hdr / metadata->rangeScalingFactor);
+ uint32_t rgba1010102 = colorToRgba1010102(rgb_gamma_hdr);
+
+ size_t pixel_idx = x + y * width;
+ reinterpret_cast<uint32_t*>(dest->data)[pixel_idx] = rgba1010102;
+ }
+ }
+ }
+ };
+
+ const int threads = std::clamp(GetCPUCoreCount(), 1, 4);
+ std::vector<std::thread> workers;
+ for (int th = 0; th < threads - 1; th++) {
+ workers.push_back(std::thread(applyRecMap));
}
+ const int rowStep = threads == 1 ? uncompressed_yuv_420_image->height : kJobSzInRows;
+ for (int rowStart = 0; rowStart < uncompressed_yuv_420_image->height;) {
+ int rowEnd = std::min(rowStart + rowStep, uncompressed_yuv_420_image->height);
+ jobQueue.enqueueJob(rowStart, rowEnd);
+ rowStart = rowEnd;
+ }
+ jobQueue.markQueueForEnd();
+ applyRecMap();
+ std::for_each(workers.begin(), workers.end(), [](std::thread& t) { t.join(); });
return NO_ERROR;
}