clang: Add -fconvergent-functions flag

arsenm · arsenm · commit e531750c6cf9 · 2019-11-19T23:20:15.000+05:30
The CUDA builtin library is apparently compiled in C++ mode, so the
assumption of convergent needs to be made in a typically non-SPMD
language. The functions in the library should still be assumed
convergent. Currently they are not, which is potentially incorrect and
this happens to work after the library is linked.
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
@@ -122,6 +122,7 @@ LANGOPT(WritableStrings   , 1, 0, "writable string support")
 LANGOPT(ConstStrings      , 1, 0, "const-qualified string support")
 ENUM_LANGOPT(LaxVectorConversions, LaxVectorConversionKind, 2,
              LaxVectorConversionKind::All, "lax vector conversions")
+LANGOPT(ConvergentFunctions, 1, 1, "Assume convergent functions")
 LANGOPT(AltiVec           , 1, 0, "AltiVec-style vector initializers")
 LANGOPT(ZVector           , 1, 0, "System z vector extensions")
 LANGOPT(Exceptions        , 1, 0, "exception handling")
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
@@ -312,7 +312,7 @@ class LangOptions : public LangOptionsBase {
   }
 
   bool assumeFunctionsAreConvergent() const {
-    return (CUDA && CUDAIsDevice) || OpenCL;
+    return ConvergentFunctions;
   }
 
   /// Return the OpenCL C or C++ version as a VersionTuple.
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
@@ -546,6 +546,9 @@ def cxx_isystem : JoinedOrSeparate<["-"], "cxx-isystem">, Group<clang_i_Group>,
   MetaVarName<"<directory>">;
 def c : Flag<["-"], "c">, Flags<[DriverOption]>, Group<Action_Group>,
   HelpText<"Only run preprocess, compile, and assemble steps">;
+def fconvergent_functions : Joined<["-"], "fconvergent-functions">, Group<f_Group>, Flags<[CC1Option]>,
+  HelpText<"Assume functions may be convergent">;
+
 def cuda_device_only : Flag<["--"], "cuda-device-only">,
   HelpText<"Compile CUDA code for device only">;
 def cuda_host_only : Flag<["--"], "cuda-host-only">,
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2776,6 +2776,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.BlocksRuntimeOptional = Args.hasArg(OPT_fblocks_runtime_optional);
   Opts.Coroutines = Opts.CPlusPlus2a || Args.hasArg(OPT_fcoroutines_ts);
 
+  Opts.ConvergentFunctions = Opts.OpenCL || (Opts.CUDA && Opts.CUDAIsDevice) ||
+    Args.hasArg(OPT_fconvergent_functions);
+
   Opts.DoubleSquareBracketAttributes =
       Args.hasFlag(OPT_fdouble_square_bracket_attributes,
                    OPT_fno_double_square_bracket_attributes,
diff --git a/clang/test/CodeGen/convergent-functions.cpp b/clang/test/CodeGen/convergent-functions.cpp
@@ -0,0 +1,8 @@
+// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -fconvergent-functions -o - < %s | FileCheck -check-prefix=CONVFUNC %s
+// RUN: %clang_cc1 -triple i386-pc-win32 -emit-llvm -o - < %s | FileCheck -check-prefix=NOCONVFUNC %s
+
+// Test that the -fconvergent-functions flag works
+
+// CONVFUNC: attributes #0 = { convergent {{.*}} }
+// NOCONVFUNC-NOT: convergent
+void func() { }
diff --git a/clang/test/CodeGenCUDA/propagate-metadata.cu b/clang/test/CodeGenCUDA/propagate-metadata.cu
@@ -11,7 +11,7 @@
 
 // Build the bitcode library.  This is not built in CUDA mode, otherwise it
 // might have incompatible attributes.  This mirrors how libdevice is built.
-// RUN: %clang_cc1 -x c++ -emit-llvm-bc -ftrapping-math -DLIB \
+// RUN: %clang_cc1 -x c++ -fconvergent-functions -emit-llvm-bc -ftrapping-math -DLIB \
 // RUN:   %s -o %t.bc -triple nvptx-unknown-unknown
 
 // RUN: %clang_cc1 -x cuda %s -emit-llvm -mlink-builtin-bitcode %t.bc -o - \
@@ -53,7 +53,8 @@ __global__ void kernel() { lib_fn(); }
 
 // Check the attribute list.
 // CHECK: attributes [[attr]] = {
-// CHECK: "no-trapping-math"="true"
+// CHECK-SAME: convergent
+// CHECK-SAME: "no-trapping-math"="true"
 
 // FTZ-SAME: "nvptx-f32ftz"="true"
 // NOFTZ-NOT: "nvptx-f32ftz"="true"

Original file line number	Diff line number	Diff line change
`@@ -312,7 +312,7 @@ class LangOptions : public LangOptionsBase {`
`312`	`312`	`}`
`313`	`313`
`314`	`314`	`bool assumeFunctionsAreConvergent() const {`
`315`		`- return (CUDA && CUDAIsDevice) \|\| OpenCL;`
	`315`	`+ return ConvergentFunctions;`
`316`	`316`	`}`
`317`	`317`
`318`	`318`	`/// Return the OpenCL C or C++ version as a VersionTuple.`