Skip to content

Commit 5942868

Browse files
author
Mészáros Gergely
authored
[clang][AMDGPU][CUDA] Handle __builtin_printf for device printf (#68515)
Previously `__builtin_printf` would result to emitting call to `printf`, even though directly calling `printf` was translated. Ref: #68478
1 parent ee06678 commit 5942868

File tree

4 files changed

+47
-1
lines changed

4 files changed

+47
-1
lines changed

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5710,6 +5710,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
57105710
Value *HalfVal = Builder.CreateLoad(Address);
57115711
return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
57125712
}
5713+
case Builtin::BI__builtin_printf:
57135714
case Builtin::BIprintf:
57145715
if (getTarget().getTriple().isNVPTX() ||
57155716
getTarget().getTriple().isAMDGCN()) {

clang/lib/CodeGen/CGGPUBuiltin.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,8 @@ RValue EmitDevicePrintfCallExpr(const CallExpr *E, CodeGenFunction *CGF,
136136
llvm::Function *Decl, bool WithSizeArg) {
137137
CodeGenModule &CGM = CGF->CGM;
138138
CGBuilderTy &Builder = CGF->Builder;
139-
assert(E->getBuiltinCallee() == Builtin::BIprintf);
139+
assert(E->getBuiltinCallee() == Builtin::BIprintf ||
140+
E->getBuiltinCallee() == Builtin::BI__builtin_printf);
140141
assert(E->getNumArgs() >= 1); // printf always has at least one arg.
141142

142143
// Uses the same format as nvptx for the argument packing, but also passes
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// REQUIRES: x86-registered-target
2+
// REQUIRES: nvptx-registered-target
3+
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -emit-llvm -disable-llvm-optzns -fno-builtin-printf -fcuda-is-device \
4+
// RUN: -o - %s | FileCheck %s
5+
6+
#define __device__ __attribute__((device))
7+
8+
extern "C" __device__ int printf(const char *format, ...);
9+
10+
// CHECK-LABEL: @_Z4foo1v()
11+
__device__ int foo1() {
12+
// CHECK: call i32 @vprintf
13+
// CHECK-NOT: call i32 (ptr, ...) @printf
14+
return __builtin_printf("Hello World\n");
15+
}
16+
17+
// CHECK-LABEL: @_Z4foo2v()
18+
__device__ int foo2() {
19+
// CHECK: call i32 (ptr, ...) @printf
20+
return printf("Hello World\n");
21+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
// REQUIRES: amdgpu-registered-target
2+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns -mprintf-kind=hostcall -fno-builtin-printf -fcuda-is-device \
3+
// RUN: -o - %s | FileCheck --check-prefixes=CHECK,HOSTCALL %s
4+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -emit-llvm -disable-llvm-optzns -mprintf-kind=buffered -fno-builtin-printf -fcuda-is-device \
5+
// RUN: -o - %s | FileCheck --check-prefixes=CHECK,BUFFERED %s
6+
7+
#define __device__ __attribute__((device))
8+
9+
extern "C" __device__ int printf(const char *format, ...);
10+
11+
// CHECK-LABEL: @_Z4foo1v()
12+
__device__ int foo1() {
13+
// HOSTCALL: call i64 @__ockl_printf_begin
14+
// BUFFERED: call ptr addrspace(1) @__printf_alloc
15+
// CHECK-NOT: call i32 (ptr, ...) @printf
16+
return __builtin_printf("Hello World\n");
17+
}
18+
19+
// CHECK-LABEL: @_Z4foo2v()
20+
__device__ int foo2() {
21+
// CHECK: call i32 (ptr, ...) @printf
22+
return printf("Hello World\n");
23+
}

0 commit comments

Comments
 (0)