Skip to content

Commit 347fb20

Browse files
authored
[libclc] Optimize CLC vector relational builtins (llvm#124537)
Clang knows how to perform relational operations on OpenCL vectors, so we don't need to use the Clang builtins. The builtins we were using didn't support vector types, so we were previously scalarizing. This commit generates the same LLVM fcmp operations as before, just without the scalarization.
1 parent ef54e0b commit 347fb20

File tree

8 files changed

+90
-130
lines changed

8 files changed

+90
-130
lines changed

libclc/clc/include/clc/relational/relational.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,4 +142,30 @@
142142
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(RET_TYPE, FUNCTION, ARG0_TYPE, \
143143
ARG1_TYPE)
144144

145+
#define _CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(RET_TYPE, RET_TYPE_VEC, FUNCTION, \
146+
ARG1_TYPE, ARG2_TYPE) \
147+
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
148+
return _CLC_RELATIONAL_OP(x, y); \
149+
} \
150+
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##2 FUNCTION(ARG1_TYPE##2 x, \
151+
ARG2_TYPE##2 y) { \
152+
return _CLC_RELATIONAL_OP(x, y); \
153+
} \
154+
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##3 FUNCTION(ARG1_TYPE##3 x, \
155+
ARG2_TYPE##3 y) { \
156+
return _CLC_RELATIONAL_OP(x, y); \
157+
} \
158+
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##4 FUNCTION(ARG1_TYPE##4 x, \
159+
ARG2_TYPE##4 y) { \
160+
return _CLC_RELATIONAL_OP(x, y); \
161+
} \
162+
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##8 FUNCTION(ARG1_TYPE##8 x, \
163+
ARG2_TYPE##8 y) { \
164+
return _CLC_RELATIONAL_OP(x, y); \
165+
} \
166+
_CLC_DEF _CLC_OVERLOAD RET_TYPE_VEC##16 FUNCTION(ARG1_TYPE##16 x, \
167+
ARG2_TYPE##16 y) { \
168+
return _CLC_RELATIONAL_OP(x, y); \
169+
}
170+
145171
#endif // __CLC_RELATIONAL_RELATIONAL_H__
Lines changed: 11 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,44 +1,28 @@
11
#include <clc/internal/clc.h>
2+
#include <clc/relational/relational.h>
23

3-
#define _CLC_DEFINE_ISEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
4-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
5-
return (x == y); \
6-
}
4+
#define _CLC_RELATIONAL_OP(X, Y) (X) == (Y)
75

8-
_CLC_DEFINE_ISEQUAL(int, __clc_isequal, float, float)
9-
_CLC_DEFINE_ISEQUAL(int2, __clc_isequal, float2, float2)
10-
_CLC_DEFINE_ISEQUAL(int3, __clc_isequal, float3, float3)
11-
_CLC_DEFINE_ISEQUAL(int4, __clc_isequal, float4, float4)
12-
_CLC_DEFINE_ISEQUAL(int8, __clc_isequal, float8, float8)
13-
_CLC_DEFINE_ISEQUAL(int16, __clc_isequal, float16, float16)
6+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isequal, float, float)
147

158
#ifdef cl_khr_fp64
169

1710
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1811

19-
// The scalar version of __clc_isequal(double) returns an int, but the vector
20-
// versions return long.
21-
_CLC_DEFINE_ISEQUAL(int, __clc_isequal, double, double)
22-
_CLC_DEFINE_ISEQUAL(long2, __clc_isequal, double2, double2)
23-
_CLC_DEFINE_ISEQUAL(long3, __clc_isequal, double3, double3)
24-
_CLC_DEFINE_ISEQUAL(long4, __clc_isequal, double4, double4)
25-
_CLC_DEFINE_ISEQUAL(long8, __clc_isequal, double8, double8)
26-
_CLC_DEFINE_ISEQUAL(long16, __clc_isequal, double16, double16)
12+
// The scalar version of __clc_isequal(double, double) returns an int, but the
13+
// vector versions return long.
14+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isequal, double, double)
2715

2816
#endif
17+
2918
#ifdef cl_khr_fp16
3019

3120
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
3221

33-
// The scalar version of __clc_isequal(half) returns an int, but the vector
34-
// versions return short.
35-
_CLC_DEFINE_ISEQUAL(int, __clc_isequal, half, half)
36-
_CLC_DEFINE_ISEQUAL(short2, __clc_isequal, half2, half2)
37-
_CLC_DEFINE_ISEQUAL(short3, __clc_isequal, half3, half3)
38-
_CLC_DEFINE_ISEQUAL(short4, __clc_isequal, half4, half4)
39-
_CLC_DEFINE_ISEQUAL(short8, __clc_isequal, half8, half8)
40-
_CLC_DEFINE_ISEQUAL(short16, __clc_isequal, half16, half16)
22+
// The scalar version of __clc_isequal(half, half) returns an int, but the
23+
// vector versions return short.
24+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isequal, half, half)
4125

4226
#endif
4327

44-
#undef _CLC_DEFINE_ISEQUAL
28+
#undef _CLC_RELATIONAL_OP
Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,17 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
// Note: It would be nice to use __builtin_isgreater with vector inputs, but it
5-
// seems to only take scalar values as input, which will produce incorrect
6-
// output for vector input types.
4+
#define _CLC_RELATIONAL_OP(X, Y) (X) > (Y)
75

8-
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreater, __builtin_isgreater, float,
9-
float)
6+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isgreater, float, float)
107

118
#ifdef cl_khr_fp64
129

1310
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1411

1512
// The scalar version of __clc_isgreater(double, double) returns an int, but the
1613
// vector versions return long.
17-
18-
_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(double x, double y) {
19-
return __builtin_isgreater(x, y);
20-
}
21-
22-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)
14+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isgreater, double, double)
2315

2416
#endif
2517

@@ -29,11 +21,8 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreater, double, double)
2921

3022
// The scalar version of __clc_isgreater(half, half) returns an int, but the
3123
// vector versions return short.
32-
33-
_CLC_DEF _CLC_OVERLOAD int __clc_isgreater(half x, half y) {
34-
return __builtin_isgreater(x, y);
35-
}
36-
37-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreater, half, half)
24+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isgreater, half, half)
3825

3926
#endif
27+
28+
#undef _CLC_RELATIONAL_OP
Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,31 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
// Note: It would be nice to use __builtin_isgreaterequal with vector inputs,
5-
// but it seems to only take scalar values as input, which will produce
6-
// incorrect output for vector input types.
4+
#define _CLC_RELATIONAL_OP(X, Y) (X) >= (Y)
75

8-
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isgreaterequal,
9-
__builtin_isgreaterequal, float, float)
6+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isgreaterequal, float,
7+
float)
108

119
#ifdef cl_khr_fp64
1210

1311
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1412

1513
// The scalar version of __clc_isgreaterequal(double, double) returns an int,
1614
// but the vector versions return long.
17-
18-
_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(double x, double y) {
19-
return __builtin_isgreaterequal(x, y);
20-
}
21-
22-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isgreaterequal, double,
23-
double)
15+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isgreaterequal, double,
16+
double)
2417

2518
#endif
19+
2620
#ifdef cl_khr_fp16
2721

2822
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2923

30-
// The scalar version of __clc_isgreaterequal(half, half) returns an int, but
24+
// The scalar version of __clc_isgreaterequal(half, hafl) returns an int, but
3125
// the vector versions return short.
32-
33-
_CLC_DEF _CLC_OVERLOAD int __clc_isgreaterequal(half x, half y) {
34-
return __builtin_isgreaterequal(x, y);
35-
}
36-
37-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isgreaterequal, half, half)
26+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isgreaterequal, half,
27+
half)
3828

3929
#endif
30+
31+
#undef _CLC_RELATIONAL_OP
Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,28 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
// Note: It would be nice to use __builtin_isless with vector inputs, but it
5-
// seems to only take scalar values as input, which will produce incorrect
6-
// output for vector input types.
4+
#define _CLC_RELATIONAL_OP(X, Y) (X) < (Y)
75

8-
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_isless, __builtin_isless, float, float)
6+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isless, float, float)
97

108
#ifdef cl_khr_fp64
119

1210
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1311

14-
// The scalar version of __clc_isless(double, double) returns an int, but the
15-
// vector versions return long.
16-
17-
_CLC_DEF _CLC_OVERLOAD int __clc_isless(double x, double y) {
18-
return __builtin_isless(x, y);
19-
}
20-
21-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isless, double, double)
12+
// The scalar version of __clc_isless(double, double) returns an int, but
13+
// the vector versions return long.
14+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isless, double, double)
2215

2316
#endif
17+
2418
#ifdef cl_khr_fp16
2519

2620
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2721

28-
// The scalar version of __clc_isless(half, half) returns an int, but the vector
29-
// versions return short.
30-
31-
_CLC_DEF _CLC_OVERLOAD int __clc_isless(half x, half y) {
32-
return __builtin_isless(x, y);
33-
}
34-
35-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isless, half, half)
22+
// The scalar version of __clc_isless(half, half) returns an int, but the
23+
// vector versions return short.
24+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isless, half, half)
3625

3726
#endif
27+
28+
#undef _CLC_RELATIONAL_OP
Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,18 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
// Note: It would be nice to use __builtin_islessequal with vector inputs, but
5-
// it seems to only take scalar values as input, which will produce incorrect
6-
// output for vector input types.
4+
#define _CLC_RELATIONAL_OP(X, Y) (X) <= (Y)
75

8-
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessequal, __builtin_islessequal,
9-
float, float)
6+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_islessequal, float, float)
107

118
#ifdef cl_khr_fp64
129

1310
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1411

1512
// The scalar version of __clc_islessequal(double, double) returns an int, but
1613
// the vector versions return long.
17-
18-
_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(double x, double y) {
19-
return __builtin_islessequal(x, y);
20-
}
21-
22-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)
14+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_islessequal, double,
15+
double)
2316

2417
#endif
2518

@@ -29,11 +22,8 @@ _CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessequal, double, double)
2922

3023
// The scalar version of __clc_islessequal(half, half) returns an int, but the
3124
// vector versions return short.
32-
33-
_CLC_DEF _CLC_OVERLOAD int __clc_islessequal(half x, half y) {
34-
return __builtin_islessequal(x, y);
35-
}
36-
37-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessequal, half, half)
25+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_islessequal, half, half)
3826

3927
#endif
28+
29+
#undef _CLC_RELATIONAL_OP
Lines changed: 10 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,31 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
// Note: It would be nice to use __builtin_islessgreater with vector inputs, but
5-
// it seems to only take scalar values as input, which will produce incorrect
6-
// output for vector input types.
4+
#define _CLC_RELATIONAL_OP(X, Y) ((X) < (Y)) || ((X) > (Y))
75

8-
_CLC_DEFINE_RELATIONAL_BINARY(int, __clc_islessgreater, __builtin_islessgreater,
9-
float, float)
6+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_islessgreater, float,
7+
float)
108

119
#ifdef cl_khr_fp64
1210

1311
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1412

1513
// The scalar version of __clc_islessgreater(double, double) returns an int, but
1614
// the vector versions return long.
17-
18-
_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(double x, double y) {
19-
return __builtin_islessgreater(x, y);
20-
}
21-
22-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_islessgreater, double, double)
15+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_islessgreater, double,
16+
double)
2317

2418
#endif
19+
2520
#ifdef cl_khr_fp16
2621

2722
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2823

2924
// The scalar version of __clc_islessgreater(half, half) returns an int, but the
3025
// vector versions return short.
31-
32-
_CLC_DEF _CLC_OVERLOAD int __clc_islessgreater(half x, half y) {
33-
return __builtin_islessgreater(x, y);
34-
}
35-
36-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_islessgreater, half, half)
26+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_islessgreater, half,
27+
half)
3728

3829
#endif
30+
31+
#undef _CLC_RELATIONAL_OP
Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,28 @@
11
#include <clc/internal/clc.h>
22
#include <clc/relational/relational.h>
33

4-
#define _CLC_DEFINE_ISNOTEQUAL(RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE) \
5-
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
6-
return (x != y); \
7-
}
4+
#define _CLC_RELATIONAL_OP(X, Y) (X) != (Y)
85

9-
_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, float, float)
10-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(int, __clc_isnotequal, float, float)
6+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, int, __clc_isnotequal, float, float)
117

128
#ifdef cl_khr_fp64
9+
1310
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
1411

1512
// The scalar version of __clc_isnotequal(double, double) returns an int, but
1613
// the vector versions return long.
17-
18-
_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, double, double)
19-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(long, __clc_isnotequal, double, double)
14+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, long, __clc_isnotequal, double, double)
2015

2116
#endif
17+
2218
#ifdef cl_khr_fp16
19+
2320
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
2421

2522
// The scalar version of __clc_isnotequal(half, half) returns an int, but the
2623
// vector versions return short.
27-
28-
_CLC_DEFINE_ISNOTEQUAL(int, __clc_isnotequal, half, half)
29-
_CLC_DEFINE_RELATIONAL_BINARY_VEC_ALL(short, __clc_isnotequal, half, half)
24+
_CLC_DEFINE_SIMPLE_RELATIONAL_BINARY(int, short, __clc_isnotequal, half, half)
3025

3126
#endif
3227

33-
#undef _CLC_DEFINE_ISNOTEQUAL
28+
#undef _CLC_RELATIONAL_OP

0 commit comments

Comments
 (0)