From 7633174bd62a47a50e0f7512b3094bd6a7c53b19 Mon Sep 17 00:00:00 2001 From: Hunter Kvalevog Date: Thu, 26 Feb 2026 21:17:36 -0600 Subject: yuvbench: Apple Accelerate --- yuvbench/build-macos-aarch64-clang.sh | 10 +++- yuvbench/yuvbench.c | 14 +++-- yuvbench/yuvbench_accelerate.c | 103 ++++++++++++++++++++++++++++++++++ 3 files changed, 120 insertions(+), 7 deletions(-) create mode 100644 yuvbench/yuvbench_accelerate.c (limited to 'yuvbench') diff --git a/yuvbench/build-macos-aarch64-clang.sh b/yuvbench/build-macos-aarch64-clang.sh index 0a794fa..3aa3943 100755 --- a/yuvbench/build-macos-aarch64-clang.sh +++ b/yuvbench/build-macos-aarch64-clang.sh @@ -1,7 +1,11 @@ #!/bin/sh -CFLAGS="-Wall -Wextra -Wpedantic -O3 -g -DYUVBENCH_BAD" -LFLAGS="" +CFLAGS="-Wall -Wextra -Wpedantic -O3 -g -DYUVBENCH_ACCELERATE -DYUVBENCH_BAD" +LFLAGS="-framework Accelerate" mkdir -p build +set -x clang -o build/yuvbench.o $CFLAGS -c ./yuvbench.c +clang -o build/yuvbench_accelerate.o $CFLAGS -c ./yuvbench_accelerate.c +clang -o build/yuvbench_accelerate.S $CFLAGS -S ./yuvbench_accelerate.c clang -o build/yuvbench_bad.o $CFLAGS -c ./yuvbench_bad.c -clang -o build/yuvbench $LFLAGS build/yuvbench.o build/yuvbench_bad.o +clang -o build/yuvbench_bad.S $CFLAGS -S ./yuvbench_bad.c +clang -o build/yuvbench $LFLAGS build/yuvbench.o build/yuvbench_accelerate.o build/yuvbench_bad.o diff --git a/yuvbench/yuvbench.c b/yuvbench/yuvbench.c index 1d34f0b..5ef344c 100644 --- a/yuvbench/yuvbench.c +++ b/yuvbench/yuvbench.c @@ -3,6 +3,9 @@ #define KBENCH_IMPLEMENTATION #include "kbench.h" +#ifdef YUVBENCH_ACCELERATE +Backend yuvbench_accelerate(void); +#endif #ifdef YUVBENCH_BAD Backend yuvbench_bad(void); #endif @@ -44,7 +47,7 @@ static void run_backend(Backend b) } printf("testing...\n"); - int tests = 200; + int tests = 1000; double* tests_table = calloc(tests, sizeof(double)); assert(tests_table); for (int i = 0; i < tests; ++i) { uintptr_t t0 = KBenchTS(); @@ -72,9 +75,9 @@ static void run_backend(Backend b) } ts_avg += (tests_table[i] / (double)tests); } - printf("min result: %f\n", ts_min); - printf("max result: %f\n", ts_max); - printf("avg result: %f\n", ts_avg); + printf("min result: %fms\n", ts_min * 1000.0f); + printf("max result: %fms\n", ts_max * 1000.0f); + printf("avg result: %fms\n", ts_avg * 1000.0f); #if 0 && (defined(__APPLE__) || defined(__linux__)) // Display last result @@ -167,6 +170,9 @@ int main(int argc, char** argv) G.out_len = G.inp_w * G.inp_h * 3; // RGB888 G.out_buf = calloc(1, G.out_len); +#ifdef YUVBENCH_ACCELERATE + run_backend(yuvbench_accelerate()); +#endif #ifdef YUVBENCH_BAD run_backend(yuvbench_bad()); #endif diff --git a/yuvbench/yuvbench_accelerate.c b/yuvbench/yuvbench_accelerate.c new file mode 100644 index 0000000..5f2e794 --- /dev/null +++ b/yuvbench/yuvbench_accelerate.c @@ -0,0 +1,103 @@ +#include "yuvbench.h" + +#include + +typedef struct AccelerateCtx AccelerateCtx; +struct AccelerateCtx +{ + void* rgba_buf; +}; + +static bool yuvbench_accelerate_init(Ctx* ctx) +{ + AccelerateCtx* accel = calloc(1, sizeof(AccelerateCtx)); + accel->rgba_buf = calloc(4, ctx->inp_w * ctx->inp_h); + ctx->user = accel; + return true; +} + +static void yuvbench_accelerate_deinit(Ctx* ctx) +{ + AccelerateCtx* accel = (AccelerateCtx*)ctx->user; + free(accel->rgba_buf); + free(accel); +} + +static bool yuvbench_accelerate_convert(Ctx* ctx) +{ + AccelerateCtx* accel = (AccelerateCtx*)ctx->user; + const uint32_t w = ctx->inp_w; + const uint32_t h = ctx->inp_h; + const uint8_t* Y = (const uint8_t*)ctx->inp_buf; + const uint8_t* Cb = Y + (w * h); + const uint8_t* Cr = Cb + (w / 2 * h / 2); + // BT.709 + vImage_YpCbCrToARGBMatrix matrix = { 0 }; + matrix.Yp = 1.0f; + matrix.Cb_G = -0.1873f; + matrix.Cb_B = 1.8556f; + matrix.Cr_R = 1.5748f; + matrix.Cr_G = -0.4681f; + // + vImage_YpCbCrPixelRange pr = { 0 }; + pr.Yp_bias = 16; + pr.CbCr_bias = 128; + pr.YpRangeMax = 235; + pr.CbCrRangeMax = 240; + pr.YpMax = 235; + pr.YpMin = 16; + pr.CbCrMax = 240; + pr.CbCrMin = 16; + // + vImage_YpCbCrToARGB out = { 0 }; + // + if (vImageConvert_YpCbCrToARGB_GenerateConversion(&matrix, &pr, &out, kvImage420Yp8_Cb8_Cr8, kvImageARGB8888, kvImageNoFlags) != kvImageNoError) { + return false; + } + // + vImage_Buffer ypbuf = { 0 }; + ypbuf.data = (void*)Y; + ypbuf.width = w; + ypbuf.height = h; + ypbuf.rowBytes = w; + vImage_Buffer cbbuf = { 0 }; + cbbuf.data = (void*)Cb; + cbbuf.width = w / 2; + cbbuf.height = h / 2; + cbbuf.rowBytes = w / 2; + vImage_Buffer crbuf = { 0 }; + crbuf.data = (void*)Cr; + crbuf.width = w / 2; + crbuf.height = h / 2; + crbuf.rowBytes = w / 2; + vImage_Buffer rgbabuf = { 0 }; + rgbabuf.data = accel->rgba_buf; + rgbabuf.width = w; + rgbabuf.height = h; + rgbabuf.rowBytes = w * 4; + // + uint8_t permute[4] = { 0, 1, 2, 3 }; + // + if (vImageConvert_420Yp8_Cb8_Cr8ToARGB8888(&ypbuf, &cbbuf, &crbuf, &rgbabuf, &out, permute, 0xFF, kvImageNoFlags) != kvImageNoError) { + return false; + } + // + vImage_Buffer rgbbuf = { 0 }; + rgbbuf.data = ctx->out_buf; + rgbbuf.width = w; + rgbbuf.height = h; + rgbbuf.rowBytes = w * 3; + if (vImageConvert_ARGB8888toRGB888(&rgbabuf, &rgbbuf, kvImageNoFlags) != kvImageNoError) { + return false; + } + return true; +} + +Backend yuvbench_accelerate(void) +{ + Backend b = { 0 }; + b.init_fn = yuvbench_accelerate_init; + b.deinit_fn = yuvbench_accelerate_deinit; + b.convert_fn = yuvbench_accelerate_convert; + return b; +} -- cgit v1.2.3