[libclc] Optimize CLC vector relational builtins (llvm#124537)

frasercrmck · web-flow · commit 347fb208c1e3 · 2025-01-27T13:25:37.000Z
Clang knows how to perform relational operations on OpenCL vectors, so
we don't need to use the Clang builtins. The builtins we were using
didn't support vector types, so we were previously scalarizing.

This commit generates the same LLVM fcmp operations as before, just
without the scalarization.
diff --git a/libclc/clc/include/clc/relational/relational.h b/libclc/clc/include/clc/relational/relational.h
@@ -142,4 +142,30 @@
   _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE,         \
                                         ARG1_TYPE)
 
+#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
+                                             ARG1_TYPE, ARG2_TYPE)             \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) {         \
+    return _CLC_RELATIONAL_OP(x, y);                                           \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##2 FUNCTION(ARG1_TYPE##2 x,              \
+                                                  ARG2_TYPE##2 y) {            \
+    return _CLC_RELATIONAL_OP(x, y);                                           \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##3 FUNCTION(ARG1_TYPE##3 x,              \
+                                                  ARG2_TYPE##3 y) {            \
+    return _CLC_RELATIONAL_OP(x, y);                                           \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##4 FUNCTION(ARG1_TYPE##4 x,              \
+                                                  ARG2_TYPE##4 y) {            \
+    return _CLC_RELATIONAL_OP(x, y);                                           \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##8 FUNCTION(ARG1_TYPE##8 x,              \
+                                                  ARG2_TYPE##8 y) {            \
+    return _CLC_RELATIONAL_OP(x, y);                                           \
+  }                                                                            \
+  _CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##16 FUNCTION(ARG1_TYPE##16 x,            \
+                                                   ARG2_TYPE##16 y) {          \
+    return _CLC_RELATIONAL_OP(x, y);                                           \
+  }
+
 #endif // __CLC_RELATIONAL_RELATIONAL_H__
diff --git a/libclc/clc/lib/generic/relational/clc_isequal.cl b/libclc/clc/lib/generic/relational/clc_isequal.cl
@@ -1,44 +1,28 @@
 #include <clc/internal/clc.h>
+#include <clc/relational/relational.h>
 
-#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE)          \
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) {         \
-    return (x == y);                                                           \
-  }
+#define _CLC_RELATIONAL_OP(X, Y) (X) == (Y)
 
-_CLC_DEFINE_ISEQUAL(int, __clc_isequal, float, float)
-_CLC_DEFINE_ISEQUAL(int2, __clc_isequal, float2, float2)
-_CLC_DEFINE_ISEQUAL(int3, __clc_isequal, float3, float3)
-_CLC_DEFINE_ISEQUAL(int4, __clc_isequal, float4, float4)
-_CLC_DEFINE_ISEQUAL(int8, __clc_isequal, float8, float8)
-_CLC_DEFINE_ISEQUAL(int16, __clc_isequal, float16, float16)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isequal, float, float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
-// The scalar version of __clc_isequal(double) returns an int, but the vector
-// versions return long.
-_CLC_DEFINE_ISEQUAL(int, __clc_isequal, double, double)
-_CLC_DEFINE_ISEQUAL(long2, __clc_isequal, double2, double2)
-_CLC_DEFINE_ISEQUAL(long3, __clc_isequal, double3, double3)
-_CLC_DEFINE_ISEQUAL(long4, __clc_isequal, double4, double4)
-_CLC_DEFINE_ISEQUAL(long8, __clc_isequal, double8, double8)
-_CLC_DEFINE_ISEQUAL(long16, __clc_isequal, double16, double16)
+// The scalar version of __clc_isequal(double, double) returns an int, but the
+// vector versions return long.
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isequal, double, double)
 
 #endif
+
 #ifdef cl_khr_fp16
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
-// The scalar version of __clc_isequal(half) returns an int, but the vector
-// versions return short.
-_CLC_DEFINE_ISEQUAL(int, __clc_isequal, half, half)
-_CLC_DEFINE_ISEQUAL(short2, __clc_isequal, half2, half2)
-_CLC_DEFINE_ISEQUAL(short3, __clc_isequal, half3, half3)
-_CLC_DEFINE_ISEQUAL(short4, __clc_isequal, half4, half4)
-_CLC_DEFINE_ISEQUAL(short8, __clc_isequal, half8, half8)
-_CLC_DEFINE_ISEQUAL(short16, __clc_isequal, half16, half16)
+// The scalar version of __clc_isequal(half, half) returns an int, but the
+// vector versions return short.
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isequal, half, half)
 
 #endif
 
-#undef _CLC_DEFINE_ISEQUAL
+#undef _CLC_RELATIONAL_OP
diff --git a/libclc/clc/lib/generic/relational/clc_isgreater.cl b/libclc/clc/lib/generic/relational/clc_isgreater.cl
@@ -1,25 +1,17 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-// Note: It would be nice to use __builtin_isgreater with vector inputs, but it
-// seems to only take scalar values as input, which will produce incorrect
-// output for vector input types.
+#define _CLC_RELATIONAL_OP(X, Y) (X) > (Y)
 
-_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreater, __builtin_isgreater, float,
-                              float)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isgreater, float, float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_isgreater(double, double) returns an int, but the
 // vector versions return long.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(double x, double y) {
-  return __builtin_isgreater(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isgreater, double, double)
 
 #endif
 
@@ -29,11 +21,8 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)
 
 // The scalar version of __clc_isgreater(half, half) returns an int, but the
 // vector versions return short.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(half x, half y) {
-  return __builtin_isgreater(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreater, half, half)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isgreater, half, half)
 
 #endif
+
+#undef _CLC_RELATIONAL_OP
diff --git a/libclc/clc/lib/generic/relational/clc_isgreaterequal.cl b/libclc/clc/lib/generic/relational/clc_isgreaterequal.cl
@@ -1,39 +1,31 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-// Note: It would be nice to use __builtin_isgreaterequal with vector inputs,
-// but it seems to only take scalar values as input, which will produce
-// incorrect output for vector input types.
+#define _CLC_RELATIONAL_OP(X, Y) (X) >= (Y)
 
-_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreaterequal,
-                              __builtin_isgreaterequal, float, float)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isgreaterequal, float,
+                                     float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_isgreaterequal(double, double) returns an int,
 // but the vector versions return long.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(double x, double y) {
-  return __builtin_isgreaterequal(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreaterequal, double,
-                                      double)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isgreaterequal, double,
+                                     double)
 
 #endif
+
 #ifdef cl_khr_fp16
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
-// The scalar version of __clc_isgreaterequal(half, half) returns an int, but
+// The scalar version of __clc_isgreaterequal(half, hafl) returns an int, but
 // the vector versions return short.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(half x, half y) {
-  return __builtin_isgreaterequal(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreaterequal, half, half)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isgreaterequal, half,
+                                     half)
 
 #endif
+
+#undef _CLC_RELATIONAL_OP
diff --git a/libclc/clc/lib/generic/relational/clc_isless.cl b/libclc/clc/lib/generic/relational/clc_isless.cl
@@ -1,37 +1,28 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-// Note: It would be nice to use __builtin_isless with vector inputs, but it
-// seems to only take scalar values as input, which will produce incorrect
-// output for vector input types.
+#define _CLC_RELATIONAL_OP(X, Y) (X) < (Y)
 
-_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isless, __builtin_isless, float, float)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isless, float, float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
-// The scalar version of __clc_isless(double, double) returns an int, but the
-// vector versions return long.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_isless(double x, double y) {
-  return __builtin_isless(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isless, double, double)
+// The scalar version of __clc_isless(double, double) returns an int, but
+// the vector versions return long.
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isless, double, double)
 
 #endif
+
 #ifdef cl_khr_fp16
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
-// The scalar version of __clc_isless(half, half) returns an int, but the vector
-// versions return short.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_isless(half x, half y) {
-  return __builtin_isless(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isless, half, half)
+// The scalar version of __clc_isless(half, half) returns an int, but the
+// vector versions return short.
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isless, half, half)
 
 #endif
+
+#undef _CLC_RELATIONAL_OP
diff --git a/libclc/clc/lib/generic/relational/clc_islessequal.cl b/libclc/clc/lib/generic/relational/clc_islessequal.cl
@@ -1,25 +1,18 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-// Note: It would be nice to use __builtin_islessequal with vector inputs, but
-// it seems to only take scalar values as input, which will produce incorrect
-// output for vector input types.
+#define _CLC_RELATIONAL_OP(X, Y) (X) <= (Y)
 
-_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessequal, __builtin_islessequal,
-                              float, float)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_islessequal, float, float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_islessequal(double, double) returns an int, but
 // the vector versions return long.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(double x, double y) {
-  return __builtin_islessequal(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_islessequal, double,
+                                     double)
 
 #endif
 
@@ -29,11 +22,8 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)
 
 // The scalar version of __clc_islessequal(half, half) returns an int, but the
 // vector versions return short.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(half x, half y) {
-  return __builtin_islessequal(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessequal, half, half)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_islessequal, half, half)
 
 #endif
+
+#undef _CLC_RELATIONAL_OP
diff --git a/libclc/clc/lib/generic/relational/clc_islessgreater.cl b/libclc/clc/lib/generic/relational/clc_islessgreater.cl
@@ -1,38 +1,31 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-// Note: It would be nice to use __builtin_islessgreater with vector inputs, but
-// it seems to only take scalar values as input, which will produce incorrect
-// output for vector input types.
+#define _CLC_RELATIONAL_OP(X, Y) ((X) < (Y)) || ((X) > (Y))
 
-_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessgreater, __builtin_islessgreater,
-                              float, float)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_islessgreater, float,
+                                     float)
 
 #ifdef cl_khr_fp64
 
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_islessgreater(double, double) returns an int, but
 // the vector versions return long.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(double x, double y) {
-  return __builtin_islessgreater(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessgreater, double, double)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_islessgreater, double,
+                                     double)
 
 #endif
+
 #ifdef cl_khr_fp16
 
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
 // The scalar version of __clc_islessgreater(half, half) returns an int, but the
 // vector versions return short.
-
-_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(half x, half y) {
-  return __builtin_islessgreater(x, y);
-}
-
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessgreater, half, half)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_islessgreater, half,
+                                     half)
 
 #endif
+
+#undef _CLC_RELATIONAL_OP
diff --git a/libclc/clc/lib/generic/relational/clc_isnotequal.cl b/libclc/clc/lib/generic/relational/clc_isnotequal.cl
@@ -1,33 +1,28 @@
 #include <clc/internal/clc.h>
 #include <clc/relational/relational.h>
 
-#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE)       \
-  _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) {         \
-    return (x != y);                                                           \
-  }
+#define _CLC_RELATIONAL_OP(X, Y) (X) != (Y)
 
-_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, float, float)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isnotequal, float, float)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isnotequal, float, float)
 
 #ifdef cl_khr_fp64
+
 #pragma OPENCL EXTENSION cl_khr_fp64 : enable
 
 // The scalar version of __clc_isnotequal(double, double) returns an int, but
 // the vector versions return long.
-
-_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, double, double)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isnotequal, double, double)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isnotequal, double, double)
 
 #endif
+
 #ifdef cl_khr_fp16
+
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 
 // The scalar version of __clc_isnotequal(half, half) returns an int, but the
 // vector versions return short.
-
-_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, half, half)
-_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isnotequal, half, half)
+_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isnotequal, half, half)
 
 #endif
 
-#undef _CLC_DEFINE_ISNOTEQUAL
+#undef _CLC_RELATIONAL_OP