Skip to content

Commit c00afa6

Browse files
author
Raúl Peñacoba Veigas
committed
Fix aggregate value passing in task outlines
Closes llvm#173
1 parent 22f9ca9 commit c00afa6

File tree

2 files changed

+84
-4
lines changed

2 files changed

+84
-4
lines changed

clang/lib/CodeGen/CGOmpSsRuntime.cpp

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2908,10 +2908,23 @@ RValue CGOmpSsRuntime::emitTaskFunction(CodeGenFunction &CGF,
29082908
Expr *ParmRef = emitTaskCallArg(CGF, "call_arg", ParQ, Loc, *ArgI);
29092909

29102910
if (!ParQ->isReferenceType()) {
2911-
ParmRef =
2912-
ImplicitCastExpr::Create(Ctx, ParmRef->getType(), CK_LValueToRValue,
2913-
ParmRef, /*BasePath=*/nullptr,
2914-
VK_PRValue, FPOptionsOverride());
2911+
switch (CGF.getEvaluationKind(ParQ)) {
2912+
case TEK_Complex:
2913+
case TEK_Scalar: {
2914+
ParmRef =
2915+
ImplicitCastExpr::Create(Ctx, ParmRef->getType(), CK_LValueToRValue,
2916+
ParmRef, /*BasePath=*/nullptr,
2917+
VK_PRValue, FPOptionsOverride());
2918+
break;
2919+
}
2920+
case TEK_Aggregate: {
2921+
ParmRef =
2922+
ImplicitCastExpr::Create(Ctx, ParmRef->getType(), CK_NoOp,
2923+
ParmRef, /*BasePath=*/nullptr,
2924+
VK_PRValue, FPOptionsOverride());
2925+
break;
2926+
}
2927+
}
29152928
FirstprivateCopies.push_back(ParmRef);
29162929

29172930
LValue ParmLV = CGF.EmitLValue(ParmRef);
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
2+
// RUN: %clang_cc1 -triple x86_64-gnu-linux -verify -fompss-2 -disable-llvm-passes -ferror-limit 100 %s -S -emit-llvm -o - | FileCheck %s --check-prefixes=LIN64
3+
// RUN: %clang_cc1 -triple ppc64 -verify -fompss-2 -disable-llvm-passes -ferror-limit 100 %s -S -emit-llvm -o - | FileCheck %s --check-prefixes=PPC64
4+
// RUN: %clang_cc1 -triple aarch64 -verify -fompss-2 -disable-llvm-passes -ferror-limit 100 %s -S -emit-llvm -o - | FileCheck %s --check-prefixes=AARCH64
5+
// expected-no-diagnostics
6+
struct half {
7+
int x;
8+
};
9+
#pragma oss task
10+
void foo(half a);
11+
12+
int main(){
13+
half a;
14+
foo(a);
15+
#pragma oss taskwait
16+
}
17+
// LIN64-LABEL: define {{[^@]+}}@main
18+
// LIN64-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
19+
// LIN64-NEXT: entry:
20+
// LIN64-NEXT: [[A:%.*]] = alloca [[STRUCT_HALF:%.*]], align 4
21+
// LIN64-NEXT: [[CALL_ARG:%.*]] = alloca [[STRUCT_HALF]], align 4
22+
// LIN64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[CALL_ARG]], ptr align 4 [[A]], i64 4, i1 false), !dbg [[DBG9:![0-9]+]]
23+
// LIN64-NEXT: [[TMP0:%.*]] = call token @llvm.directive.region.entry() [ "DIR.OSS"([5 x i8] c"TASK\00"), "QUAL.OSS.FIRSTPRIVATE"(ptr [[CALL_ARG]], [[STRUCT_HALF]] undef), "QUAL.OSS.DECL.SOURCE"([8 x i8] c"foo:9:9\00") ], !dbg [[DBG10:![0-9]+]]
24+
// LIN64-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_HALF]], align 4
25+
// LIN64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP]], ptr align 4 [[CALL_ARG]], i64 4, i1 false), !dbg [[DBG9]]
26+
// LIN64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_HALF]], ptr [[AGG_TMP]], i32 0, i32 0, !dbg [[DBG10]]
27+
// LIN64-NEXT: [[TMP1:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4, !dbg [[DBG10]]
28+
// LIN64-NEXT: call void @_Z3foo4half(i32 [[TMP1]]), !dbg [[DBG10]]
29+
// LIN64-NEXT: call void @llvm.directive.region.exit(token [[TMP0]]), !dbg [[DBG10]]
30+
// LIN64-NEXT: [[TMP2:%.*]] = call i1 @llvm.directive.marker() [ "DIR.OSS"([9 x i8] c"TASKWAIT\00") ], !dbg [[DBG11:![0-9]+]]
31+
// LIN64-NEXT: ret i32 0, !dbg [[DBG12:![0-9]+]]
32+
//
33+
//
34+
// PPC64-LABEL: define {{[^@]+}}@main
35+
// PPC64-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
36+
// PPC64-NEXT: entry:
37+
// PPC64-NEXT: [[A:%.*]] = alloca [[STRUCT_HALF:%.*]], align 4
38+
// PPC64-NEXT: [[CALL_ARG:%.*]] = alloca [[STRUCT_HALF]], align 4
39+
// PPC64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[CALL_ARG]], ptr align 4 [[A]], i64 4, i1 false), !dbg [[DBG9:![0-9]+]]
40+
// PPC64-NEXT: [[TMP0:%.*]] = call token @llvm.directive.region.entry() [ "DIR.OSS"([5 x i8] c"TASK\00"), "QUAL.OSS.FIRSTPRIVATE"(ptr [[CALL_ARG]], [[STRUCT_HALF]] undef), "QUAL.OSS.DECL.SOURCE"([8 x i8] c"foo:9:9\00") ], !dbg [[DBG10:![0-9]+]]
41+
// PPC64-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_HALF]], align 4
42+
// PPC64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP]], ptr align 4 [[CALL_ARG]], i64 4, i1 false), !dbg [[DBG9]]
43+
// PPC64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_HALF]], ptr [[AGG_TMP]], i32 0, i32 0, !dbg [[DBG10]]
44+
// PPC64-NEXT: [[TMP1:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4, !dbg [[DBG10]]
45+
// PPC64-NEXT: call void @_Z3foo4half(i32 [[TMP1]]), !dbg [[DBG10]]
46+
// PPC64-NEXT: call void @llvm.directive.region.exit(token [[TMP0]]), !dbg [[DBG10]]
47+
// PPC64-NEXT: [[TMP2:%.*]] = call i1 @llvm.directive.marker() [ "DIR.OSS"([9 x i8] c"TASKWAIT\00") ], !dbg [[DBG11:![0-9]+]]
48+
// PPC64-NEXT: ret i32 0, !dbg [[DBG12:![0-9]+]]
49+
//
50+
//
51+
// AARCH64-LABEL: define {{[^@]+}}@main
52+
// AARCH64-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
53+
// AARCH64-NEXT: entry:
54+
// AARCH64-NEXT: [[A:%.*]] = alloca [[STRUCT_HALF:%.*]], align 4
55+
// AARCH64-NEXT: [[CALL_ARG:%.*]] = alloca [[STRUCT_HALF]], align 4
56+
// AARCH64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[CALL_ARG]], ptr align 4 [[A]], i64 4, i1 false), !dbg [[DBG9:![0-9]+]]
57+
// AARCH64-NEXT: [[TMP0:%.*]] = call token @llvm.directive.region.entry() [ "DIR.OSS"([5 x i8] c"TASK\00"), "QUAL.OSS.FIRSTPRIVATE"(ptr [[CALL_ARG]], [[STRUCT_HALF]] undef), "QUAL.OSS.DECL.SOURCE"([8 x i8] c"foo:9:9\00") ], !dbg [[DBG10:![0-9]+]]
58+
// AARCH64-NEXT: [[AGG_TMP:%.*]] = alloca [[STRUCT_HALF]], align 4
59+
// AARCH64-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[AGG_TMP]], ptr align 4 [[CALL_ARG]], i64 4, i1 false), !dbg [[DBG9]]
60+
// AARCH64-NEXT: [[COERCE_DIVE:%.*]] = getelementptr inbounds [[STRUCT_HALF]], ptr [[AGG_TMP]], i32 0, i32 0, !dbg [[DBG10]]
61+
// AARCH64-NEXT: [[TMP1:%.*]] = load i32, ptr [[COERCE_DIVE]], align 4, !dbg [[DBG10]]
62+
// AARCH64-NEXT: [[COERCE_VAL_II:%.*]] = zext i32 [[TMP1]] to i64, !dbg [[DBG10]]
63+
// AARCH64-NEXT: call void @_Z3foo4half(i64 [[COERCE_VAL_II]]), !dbg [[DBG10]]
64+
// AARCH64-NEXT: call void @llvm.directive.region.exit(token [[TMP0]]), !dbg [[DBG10]]
65+
// AARCH64-NEXT: [[TMP2:%.*]] = call i1 @llvm.directive.marker() [ "DIR.OSS"([9 x i8] c"TASKWAIT\00") ], !dbg [[DBG11:![0-9]+]]
66+
// AARCH64-NEXT: ret i32 0, !dbg [[DBG12:![0-9]+]]
67+
//

0 commit comments

Comments
 (0)