summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHunter Kvalevog <hunter@kvog.sh>2026-02-26 21:17:36 -0600
committerHunter Kvalevog <hunter@kvog.sh>2026-02-26 21:17:36 -0600
commit7633174bd62a47a50e0f7512b3094bd6a7c53b19 (patch)
tree4ae5b5b17cc9ea561edbb77bea3aef490430a2d4
parentae050f664a1f197bba2306271c7f5f8c2c2304f9 (diff)
yuvbench: Apple Accelerate
-rwxr-xr-xyuvbench/build-macos-aarch64-clang.sh10
-rw-r--r--yuvbench/yuvbench.c14
-rw-r--r--yuvbench/yuvbench_accelerate.c103
3 files changed, 120 insertions, 7 deletions
diff --git a/yuvbench/build-macos-aarch64-clang.sh b/yuvbench/build-macos-aarch64-clang.sh
index 0a794fa..3aa3943 100755
--- a/yuvbench/build-macos-aarch64-clang.sh
+++ b/yuvbench/build-macos-aarch64-clang.sh
@@ -1,7 +1,11 @@
#!/bin/sh
-CFLAGS="-Wall -Wextra -Wpedantic -O3 -g -DYUVBENCH_BAD"
-LFLAGS=""
+CFLAGS="-Wall -Wextra -Wpedantic -O3 -g -DYUVBENCH_ACCELERATE -DYUVBENCH_BAD"
+LFLAGS="-framework Accelerate"
mkdir -p build
+set -x
clang -o build/yuvbench.o $CFLAGS -c ./yuvbench.c
+clang -o build/yuvbench_accelerate.o $CFLAGS -c ./yuvbench_accelerate.c
+clang -o build/yuvbench_accelerate.S $CFLAGS -S ./yuvbench_accelerate.c
clang -o build/yuvbench_bad.o $CFLAGS -c ./yuvbench_bad.c
-clang -o build/yuvbench $LFLAGS build/yuvbench.o build/yuvbench_bad.o
+clang -o build/yuvbench_bad.S $CFLAGS -S ./yuvbench_bad.c
+clang -o build/yuvbench $LFLAGS build/yuvbench.o build/yuvbench_accelerate.o build/yuvbench_bad.o
diff --git a/yuvbench/yuvbench.c b/yuvbench/yuvbench.c
index 1d34f0b..5ef344c 100644
--- a/yuvbench/yuvbench.c
+++ b/yuvbench/yuvbench.c
@@ -3,6 +3,9 @@
#define KBENCH_IMPLEMENTATION
#include "kbench.h"
+#ifdef YUVBENCH_ACCELERATE
+Backend yuvbench_accelerate(void);
+#endif
#ifdef YUVBENCH_BAD
Backend yuvbench_bad(void);
#endif
@@ -44,7 +47,7 @@ static void run_backend(Backend b)
}
printf("testing...\n");
- int tests = 200;
+ int tests = 1000;
double* tests_table = calloc(tests, sizeof(double)); assert(tests_table);
for (int i = 0; i < tests; ++i) {
uintptr_t t0 = KBenchTS();
@@ -72,9 +75,9 @@ static void run_backend(Backend b)
}
ts_avg += (tests_table[i] / (double)tests);
}
- printf("min result: %f\n", ts_min);
- printf("max result: %f\n", ts_max);
- printf("avg result: %f\n", ts_avg);
+ printf("min result: %fms\n", ts_min * 1000.0f);
+ printf("max result: %fms\n", ts_max * 1000.0f);
+ printf("avg result: %fms\n", ts_avg * 1000.0f);
#if 0 && (defined(__APPLE__) || defined(__linux__))
// Display last result
@@ -167,6 +170,9 @@ int main(int argc, char** argv)
G.out_len = G.inp_w * G.inp_h * 3; // RGB888
G.out_buf = calloc(1, G.out_len);
+#ifdef YUVBENCH_ACCELERATE
+ run_backend(yuvbench_accelerate());
+#endif
#ifdef YUVBENCH_BAD
run_backend(yuvbench_bad());
#endif
diff --git a/yuvbench/yuvbench_accelerate.c b/yuvbench/yuvbench_accelerate.c
new file mode 100644
index 0000000..5f2e794
--- /dev/null
+++ b/yuvbench/yuvbench_accelerate.c
@@ -0,0 +1,103 @@
+#include "yuvbench.h"
+
+#include <Accelerate/Accelerate.h>
+
+typedef struct AccelerateCtx AccelerateCtx;
+struct AccelerateCtx
+{
+ void* rgba_buf;
+};
+
+static bool yuvbench_accelerate_init(Ctx* ctx)
+{
+ AccelerateCtx* accel = calloc(1, sizeof(AccelerateCtx));
+ accel->rgba_buf = calloc(4, ctx->inp_w * ctx->inp_h);
+ ctx->user = accel;
+ return true;
+}
+
+static void yuvbench_accelerate_deinit(Ctx* ctx)
+{
+ AccelerateCtx* accel = (AccelerateCtx*)ctx->user;
+ free(accel->rgba_buf);
+ free(accel);
+}
+
+static bool yuvbench_accelerate_convert(Ctx* ctx)
+{
+ AccelerateCtx* accel = (AccelerateCtx*)ctx->user;
+ const uint32_t w = ctx->inp_w;
+ const uint32_t h = ctx->inp_h;
+ const uint8_t* Y = (const uint8_t*)ctx->inp_buf;
+ const uint8_t* Cb = Y + (w * h);
+ const uint8_t* Cr = Cb + (w / 2 * h / 2);
+ // BT.709
+ vImage_YpCbCrToARGBMatrix matrix = { 0 };
+ matrix.Yp = 1.0f;
+ matrix.Cb_G = -0.1873f;
+ matrix.Cb_B = 1.8556f;
+ matrix.Cr_R = 1.5748f;
+ matrix.Cr_G = -0.4681f;
+ //
+ vImage_YpCbCrPixelRange pr = { 0 };
+ pr.Yp_bias = 16;
+ pr.CbCr_bias = 128;
+ pr.YpRangeMax = 235;
+ pr.CbCrRangeMax = 240;
+ pr.YpMax = 235;
+ pr.YpMin = 16;
+ pr.CbCrMax = 240;
+ pr.CbCrMin = 16;
+ //
+ vImage_YpCbCrToARGB out = { 0 };
+ //
+ if (vImageConvert_YpCbCrToARGB_GenerateConversion(&matrix, &pr, &out, kvImage420Yp8_Cb8_Cr8, kvImageARGB8888, kvImageNoFlags) != kvImageNoError) {
+ return false;
+ }
+ //
+ vImage_Buffer ypbuf = { 0 };
+ ypbuf.data = (void*)Y;
+ ypbuf.width = w;
+ ypbuf.height = h;
+ ypbuf.rowBytes = w;
+ vImage_Buffer cbbuf = { 0 };
+ cbbuf.data = (void*)Cb;
+ cbbuf.width = w / 2;
+ cbbuf.height = h / 2;
+ cbbuf.rowBytes = w / 2;
+ vImage_Buffer crbuf = { 0 };
+ crbuf.data = (void*)Cr;
+ crbuf.width = w / 2;
+ crbuf.height = h / 2;
+ crbuf.rowBytes = w / 2;
+ vImage_Buffer rgbabuf = { 0 };
+ rgbabuf.data = accel->rgba_buf;
+ rgbabuf.width = w;
+ rgbabuf.height = h;
+ rgbabuf.rowBytes = w * 4;
+ //
+ uint8_t permute[4] = { 0, 1, 2, 3 };
+ //
+ if (vImageConvert_420Yp8_Cb8_Cr8ToARGB8888(&ypbuf, &cbbuf, &crbuf, &rgbabuf, &out, permute, 0xFF, kvImageNoFlags) != kvImageNoError) {
+ return false;
+ }
+ //
+ vImage_Buffer rgbbuf = { 0 };
+ rgbbuf.data = ctx->out_buf;
+ rgbbuf.width = w;
+ rgbbuf.height = h;
+ rgbbuf.rowBytes = w * 3;
+ if (vImageConvert_ARGB8888toRGB888(&rgbabuf, &rgbbuf, kvImageNoFlags) != kvImageNoError) {
+ return false;
+ }
+ return true;
+}
+
+Backend yuvbench_accelerate(void)
+{
+ Backend b = { 0 };
+ b.init_fn = yuvbench_accelerate_init;
+ b.deinit_fn = yuvbench_accelerate_deinit;
+ b.convert_fn = yuvbench_accelerate_convert;
+ return b;
+}