Skip to content

Commit e531750

Browse files
committed
clang: Add -fconvergent-functions flag
The CUDA builtin library is apparently compiled in C++ mode, so the assumption of convergent needs to be made in a typically non-SPMD language. The functions in the library should still be assumed convergent. Currently they are not, which is potentially incorrect and this happens to work after the library is linked.
1 parent ce5de93 commit e531750

File tree

6 files changed

+19
-3
lines changed

6 files changed

+19
-3
lines changed

clang/include/clang/Basic/LangOptions.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ LANGOPT(WritableStrings , 1, 0, "writable string support")
122122
LANGOPT(ConstStrings , 1, 0, "const-qualified string support")
123123
ENUM_LANGOPT(LaxVectorConversions, LaxVectorConversionKind, 2,
124124
LaxVectorConversionKind::All, "lax vector conversions")
125+
LANGOPT(ConvergentFunctions, 1, 1, "Assume convergent functions")
125126
LANGOPT(AltiVec , 1, 0, "AltiVec-style vector initializers")
126127
LANGOPT(ZVector , 1, 0, "System z vector extensions")
127128
LANGOPT(Exceptions , 1, 0, "exception handling")

clang/include/clang/Basic/LangOptions.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ class LangOptions : public LangOptionsBase {
312312
}
313313

314314
bool assumeFunctionsAreConvergent() const {
315-
return (CUDA && CUDAIsDevice) || OpenCL;
315+
return ConvergentFunctions;
316316
}
317317

318318
/// Return the OpenCL C or C++ version as a VersionTuple.

clang/include/clang/Driver/Options.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,9 @@ def cxx_isystem : JoinedOrSeparate<["-"], "cxx-isystem">, Group<clang_i_Group>,
546546
MetaVarName<"<directory>">;
547547
def c : Flag<["-"], "c">, Flags<[DriverOption]>, Group<Action_Group>,
548548
HelpText<"Only run preprocess, compile, and assemble steps">;
549+
def fconvergent_functions : Joined<["-"], "fconvergent-functions">, Group<f_Group>, Flags<[CC1Option]>,
550+
HelpText<"Assume functions may be convergent">;
551+
549552
def cuda_device_only : Flag<["--"], "cuda-device-only">,
550553
HelpText<"Compile CUDA code for device only">;
551554
def cuda_host_only : Flag<["--"], "cuda-host-only">,

clang/lib/Frontend/CompilerInvocation.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2776,6 +2776,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
27762776
Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional);
27772777
Opts.Coroutines = Opts.CPlusPlus2a || Args.hasArg(OPT_fcoroutines_ts);
27782778

2779+
Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
2780+
Args.hasArg(OPT_fconvergent_functions);
2781+
27792782
Opts.DoubleSquareBracketAttributes =
27802783
Args.hasFlag(OPT_fdouble_square_bracket_attributes,
27812784
OPT_fno_double_square_bracket_attributes,
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -fconvergent-functions -o - < %s | FileCheck -check-prefix=CONVFUNC %s
2+
// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -o - < %s | FileCheck -check-prefix=NOCONVFUNC %s
3+
4+
// Test that the -fconvergent-functions flag works
5+
6+
// CONVFUNC: attributes #0 = { convergent {{.*}} }
7+
// NOCONVFUNC-NOT: convergent
8+
void func() { }

clang/test/CodeGenCUDA/propagate-metadata.cu

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
// Build the bitcode library. This is not built in CUDA mode, otherwise it
1313
// might have incompatible attributes. This mirrors how libdevice is built.
14-
// RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \
14+
// RUN: %clang_cc1 -x c++ -fconvergent-functions -emit-llvm-bc -ftrapping-math -DLIB \
1515
// RUN: %s -o %t.bc -triple nvptx-unknown-unknown
1616

1717
// RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \
@@ -53,7 +53,8 @@ __global__ void kernel() { lib_fn(); }
5353

5454
// Check the attribute list.
5555
// CHECK: attributes [[attr]] = {
56-
// CHECK: "no-trapping-math"="true"
56+
// CHECK-SAME: convergent
57+
// CHECK-SAME: "no-trapping-math"="true"
5758

5859
// FTZ-SAME: "nvptx-f32ftz"="true"
5960
// NOFTZ-NOT: "nvptx-f32ftz"="true"

0 commit comments

Comments
 (0)