Skip to content

Commit 10b12fd

Browse files
author
Raúl Peñacoba Veigas
committed
Fix VLA unnecessary allocation. See more
- Emit element type in vla DSA to fix allocation size in lowering - Skip shared vlas in additional space allocation Fixes llvm#180
1 parent cb47a7f commit 10b12fd

File tree

9 files changed

+940
-807
lines changed

9 files changed

+940
-807
lines changed

clang/lib/CodeGen/CGOmpSsRuntime.cpp

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1188,13 +1188,9 @@ void CGOmpSsRuntime::EmitDSAShared(
11881188
CaptureMapStack.back().try_emplace(VD, LV.getAddress(CGF));
11891189
DSAValue = LV.getPointer(CGF);
11901190
DSABundleList.push_back(DSAValue);
1191-
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1192-
TaskInfo.emplace_back(getBundleStr(OSSB_shared), DSABundleList);
11931191
} else {
11941192
DSAValue = CGF.EmitDeclRefLValue(DRE).getPointer(CGF);
11951193
DSABundleList.push_back(DSAValue);
1196-
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1197-
TaskInfo.emplace_back(getBundleStr(OSSB_shared), DSABundleList);
11981194
}
11991195
QualType Q = VD->getType();
12001196
// int (**p)[sizex][sizey] -> we need to capture sizex sizey only
@@ -1203,8 +1199,15 @@ void CGOmpSsRuntime::EmitDSAShared(
12031199
while (Q->isPointerType()) {
12041200
Q = Q->getPointeeType();
12051201
}
1206-
if (Q->isVariableArrayType())
1202+
if (Q->isVariableArrayType()) {
12071203
GatherVLADims(CGF, DSAValue, Q, DimsWithValue, CapturedList, IsPtr);
1204+
QualType BaseElementTy = CGF.getContext().getBaseElementType(DRE->getType());
1205+
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(BaseElementTy)));
1206+
} else {
1207+
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1208+
}
1209+
1210+
TaskInfo.emplace_back(getBundleStr(OSSB_shared), DSABundleList);
12081211

12091212
if (!DimsWithValue.empty())
12101213
TaskInfo.emplace_back(getBundleStr(OSSB_vladims), DimsWithValue);
@@ -1237,13 +1240,9 @@ void CGOmpSsRuntime::EmitDSAPrivate(
12371240
CaptureMapStack.back().try_emplace(VD, LV.getAddress(CGF));
12381241
DSAValue = LV.getPointer(CGF);
12391242
DSABundleList.push_back(DSAValue);
1240-
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1241-
TaskInfo.emplace_back(getBundleStr(OSSB_private), DSABundleList);
12421243
} else {
12431244
DSAValue = CGF.EmitDeclRefLValue(DRE).getPointer(CGF);
12441245
DSABundleList.push_back(DSAValue);
1245-
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1246-
TaskInfo.emplace_back(getBundleStr(OSSB_private), DSABundleList);
12471246
}
12481247
QualType Q = VD->getType();
12491248
// int (**p)[sizex][sizey] -> we need to capture sizex sizey only
@@ -1252,8 +1251,15 @@ void CGOmpSsRuntime::EmitDSAPrivate(
12521251
while (Q->isPointerType()) {
12531252
Q = Q->getPointeeType();
12541253
}
1255-
if (Q->isVariableArrayType())
1254+
if (Q->isVariableArrayType()) {
12561255
GatherVLADims(CGF, DSAValue, Q, DimsWithValue, CapturedList, IsPtr);
1256+
QualType BaseElementTy = CGF.getContext().getBaseElementType(DRE->getType());
1257+
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(BaseElementTy)));
1258+
} else {
1259+
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1260+
}
1261+
1262+
TaskInfo.emplace_back(getBundleStr(OSSB_private), DSABundleList);
12571263

12581264
if (!DimsWithValue.empty())
12591265
TaskInfo.emplace_back(getBundleStr(OSSB_vladims), DimsWithValue);
@@ -1291,13 +1297,9 @@ void CGOmpSsRuntime::EmitDSAFirstprivate(
12911297
CaptureMapStack.back().try_emplace(VD, LV.getAddress(CGF));
12921298
DSAValue = LV.getPointer(CGF);
12931299
DSABundleList.push_back(DSAValue);
1294-
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1295-
TaskInfo.emplace_back(getBundleStr(OSSB_firstprivate), DSABundleList);
12961300
} else {
12971301
DSAValue = CGF.EmitDeclRefLValue(DRE).getPointer(CGF);
12981302
DSABundleList.push_back(DSAValue);
1299-
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1300-
TaskInfo.emplace_back(getBundleStr(OSSB_firstprivate), DSABundleList);
13011303
}
13021304
QualType Q = VD->getType();
13031305
// int (**p)[sizex][sizey] -> we need to capture sizex sizey only
@@ -1306,8 +1308,15 @@ void CGOmpSsRuntime::EmitDSAFirstprivate(
13061308
while (Q->isPointerType()) {
13071309
Q = Q->getPointeeType();
13081310
}
1309-
if (Q->isVariableArrayType())
1311+
if (Q->isVariableArrayType()) {
13101312
GatherVLADims(CGF, DSAValue, Q, DimsWithValue, CapturedList, IsPtr);
1313+
QualType BaseElementTy = CGF.getContext().getBaseElementType(DRE->getType());
1314+
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(BaseElementTy)));
1315+
} else {
1316+
DSABundleList.push_back(llvm::UndefValue::get(CGF.ConvertType(DRE->getType())));
1317+
}
1318+
1319+
TaskInfo.emplace_back(getBundleStr(OSSB_firstprivate), DSABundleList);
13111320

13121321
if (!DimsWithValue.empty())
13131322
TaskInfo.emplace_back(getBundleStr(OSSB_vladims), DimsWithValue);

clang/test/OmpSs/IR/task_depend_array_section.c

Lines changed: 378 additions & 378 deletions
Large diffs are not rendered by default.

clang/test/OmpSs/IR/task_vla.c

Lines changed: 76 additions & 76 deletions
Large diffs are not rendered by default.

llvm/lib/Analysis/OmpSsRegionAnalysis.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,10 @@ void DirectiveEnvironment::verifyVLADimsInfo() {
488488
for (const auto &VLAWithDimsMap : VLADimsInfo) {
489489
if (!valueInDSABundles(VLAWithDimsMap.first))
490490
llvm_unreachable("VLA dims OperandBundle must have an associated DSA");
491+
if (!(getDSAType(VLAWithDimsMap.first)->isSingleValueType()
492+
|| getDSAType(VLAWithDimsMap.first)->isStructTy()))
493+
llvm_unreachable("VLA type is not scalar");
494+
491495
// VLA Dims that are not Captured is an error
492496
for (auto *V : VLAWithDimsMap.second) {
493497
if (!valueInCapturedBundle(V))

llvm/lib/Transforms/OmpSs/OmpSsTransform.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,6 +1480,9 @@ struct OmpSsDirective {
14801480
Value *computeTaskArgsVLAsExtraSizeOf(IRBuilder<> &IRB) {
14811481
Value *Sum = ConstantInt::get(Int64Ty, 0);
14821482
for (const auto &VLAWithDimsMap : VLADimsInfo) {
1483+
// Skip shareds because they don't need space in task_args
1484+
if (DSAInfo.Shared.count(VLAWithDimsMap.first))
1485+
continue;
14831486
Type *Ty = DirEnv.getDSAType(VLAWithDimsMap.first);
14841487
unsigned SizeB = DL.getTypeAllocSize(Ty);
14851488
Value *ArraySize = ConstantInt::get(Int64Ty, SizeB);
@@ -1636,6 +1639,9 @@ struct OmpSsDirective {
16361639
// Greater alignemt go first
16371640
void computeVLAsAlignOrder(SmallVectorImpl<VLAAlign> &VLAAlignsInfo) {
16381641
for (const auto &VLAWithDimsMap : VLADimsInfo) {
1642+
// Skip shareds because they don't need space in task_args
1643+
if (DSAInfo.Shared.count(VLAWithDimsMap.first))
1644+
continue;
16391645
auto *V = VLAWithDimsMap.first;
16401646
Type *Ty = DirEnv.getDSAType(V);
16411647

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --include-generated-funcs --version 2
2+
; RUN: opt %s -passes='default<O3>,ompss-2' -S | FileCheck %s
3+
; ModuleID = 'check_vla_size_in_task_args.ll'
4+
source_filename = "check_vla_size_in_task_args.ll"
5+
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
6+
target triple = "x86_64-unknown-linux-gnu"
7+
8+
; void foo() {
9+
; int n = 1;
10+
; int vla[n][20];
11+
; #pragma oss task firstprivate(vla)
12+
; {}
13+
; }
14+
15+
; Function Attrs: noinline nounwind
16+
define dso_local void @foo() #0 !dbg !5 {
17+
entry:
18+
%n = alloca i32, align 4
19+
%saved_stack = alloca ptr, align 8
20+
%__vla_expr0 = alloca i64, align 8
21+
store i32 1, ptr %n, align 4, !dbg !9
22+
%0 = load i32, ptr %n, align 4, !dbg !10
23+
%1 = zext i32 %0 to i64, !dbg !11
24+
%2 = call ptr @llvm.stacksave(), !dbg !11
25+
store ptr %2, ptr %saved_stack, align 8, !dbg !11
26+
%vla = alloca [20 x i32], i64 %1, align 16, !dbg !11
27+
store i64 %1, ptr %__vla_expr0, align 8, !dbg !11
28+
%3 = call token @llvm.directive.region.entry() [ "DIR.OSS"([5 x i8] c"TASK\00"), "QUAL.OSS.FIRSTPRIVATE"(ptr %vla, i32 undef), "QUAL.OSS.VLA.DIMS"(ptr %vla, i64 %1, i64 20), "QUAL.OSS.CAPTURED"(i64 %1, i64 20) ], !dbg !12
29+
call void @llvm.directive.region.exit(token %3), !dbg !13
30+
%4 = load ptr, ptr %saved_stack, align 8, !dbg !14
31+
call void @llvm.stackrestore(ptr %4), !dbg !14
32+
ret void, !dbg !14
33+
}
34+
35+
; Function Attrs: nocallback nofree nosync nounwind willreturn
36+
declare ptr @llvm.stacksave() #1
37+
38+
; Function Attrs: nounwind
39+
declare token @llvm.directive.region.entry() #2
40+
41+
; Function Attrs: nounwind
42+
declare void @llvm.directive.region.exit(token) #2
43+
44+
; Function Attrs: nocallback nofree nosync nounwind willreturn
45+
declare void @llvm.stackrestore(ptr) #1
46+
47+
attributes #0 = { noinline nounwind "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
48+
attributes #1 = { nocallback nofree nosync nounwind willreturn }
49+
attributes #2 = { nounwind }
50+
51+
!llvm.dbg.cu = !{!0}
52+
!llvm.module.flags = !{!2, !3}
53+
!llvm.ident = !{!4}
54+
55+
!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 17.0.0 ([email protected]:llvm-ompss/llvm-mono.git 189d6f1b886a40340f7dd586e6beeca3482d3f5b)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, splitDebugInlining: false, nameTableKind: None)
56+
!1 = !DIFile(filename: "<stdin>", directory: "")
57+
!2 = !{i32 2, !"Debug Info Version", i32 3}
58+
!3 = !{i32 1, !"wchar_size", i32 4}
59+
!4 = !{!"clang version 17.0.0 ([email protected]:llvm-ompss/llvm-mono.git 189d6f1b886a40340f7dd586e6beeca3482d3f5b)"}
60+
!5 = distinct !DISubprogram(name: "foo", scope: !6, file: !6, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !8)
61+
!6 = !DIFile(filename: "check_vla_size_in_task_args.ll", directory: "")
62+
!7 = !DISubroutineType(types: !8)
63+
!8 = !{}
64+
!9 = !DILocation(line: 2, column: 7, scope: !5)
65+
!10 = !DILocation(line: 3, column: 11, scope: !5)
66+
!11 = !DILocation(line: 3, column: 3, scope: !5)
67+
!12 = !DILocation(line: 4, column: 11, scope: !5)
68+
!13 = !DILocation(line: 5, column: 4, scope: !5)
69+
!14 = !DILocation(line: 6, column: 1, scope: !5)
70+
; CHECK-LABEL: define dso_local void @foo
71+
; CHECK-SAME: () local_unnamed_addr #[[ATTR0:[0-9]+]] !dbg [[DBG5:![0-9]+]] {
72+
; CHECK-NEXT: entry:
73+
; CHECK-NEXT: [[VLA:%.*]] = alloca [20 x i32], align 16, !dbg [[DBG9:![0-9]+]]
74+
; CHECK-NEXT: [[TMP0:%.*]] = alloca ptr, align 8, !dbg [[DBG10:![0-9]+]]
75+
; CHECK-NEXT: [[TMP1:%.*]] = alloca ptr, align 8, !dbg [[DBG10]]
76+
; CHECK-NEXT: [[NUM_DEPS:%.*]] = alloca i64, align 8, !dbg [[DBG10]]
77+
; CHECK-NEXT: br label [[FINAL_COND:%.*]], !dbg [[DBG10]]
78+
; CHECK: codeRepl:
79+
; CHECK-NEXT: store i64 0, ptr [[NUM_DEPS]], align 8, !dbg [[DBG10]]
80+
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[NUM_DEPS]], align 8, !dbg [[DBG10]]
81+
; CHECK-NEXT: call void @nanos6_create_task(ptr @task_info_var_foo, ptr @task_invocation_info_foo, ptr null, i64 112, ptr [[TMP0]], ptr [[TMP1]], i64 0, i64 [[TMP2]]), !dbg [[DBG10]]
82+
; CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TMP0]], align 8, !dbg [[DBG10]]
83+
; CHECK-NEXT: [[ARGS_END:%.*]] = getelementptr i8, ptr [[TMP3]], i64 32, !dbg [[DBG10]]
84+
; CHECK-NEXT: [[GEP_VLA:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO:%.*]], ptr [[TMP3]], i32 0, i32 0, !dbg [[DBG10]]
85+
; CHECK-NEXT: store ptr [[ARGS_END]], ptr [[GEP_VLA]], align 4, !dbg [[DBG10]]
86+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[ARGS_END]], i64 80, !dbg [[DBG10]]
87+
; CHECK-NEXT: [[GEP_VLA1:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TMP3]], i32 0, i32 0, !dbg [[DBG10]]
88+
; CHECK-NEXT: [[TMP5:%.*]] = load ptr, ptr [[GEP_VLA1]], align 8, !dbg [[DBG10]]
89+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP5]], ptr align 4 [[VLA]], i64 80, i1 false), !dbg [[DBG10]]
90+
; CHECK-NEXT: [[CAPT_GEP_:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TMP3]], i32 0, i32 1, !dbg [[DBG10]]
91+
; CHECK-NEXT: store i64 1, ptr [[CAPT_GEP_]], align 8, !dbg [[DBG10]]
92+
; CHECK-NEXT: [[CAPT_GEP_2:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TMP3]], i32 0, i32 2, !dbg [[DBG10]]
93+
; CHECK-NEXT: store i64 20, ptr [[CAPT_GEP_2]], align 8, !dbg [[DBG10]]
94+
; CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TMP1]], align 8, !dbg [[DBG10]]
95+
; CHECK-NEXT: call void @nanos6_submit_task(ptr [[TMP6]]), !dbg [[DBG10]]
96+
; CHECK-NEXT: br label [[FINAL_END:%.*]], !dbg [[DBG10]]
97+
; CHECK: final.end:
98+
; CHECK-NEXT: ret void, !dbg [[DBG11:![0-9]+]]
99+
; CHECK: final.then:
100+
; CHECK-NEXT: br label [[FINAL_END]], !dbg [[DBG11]]
101+
; CHECK: final.cond:
102+
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @nanos6_in_final(), !dbg [[DBG10]]
103+
; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i32 [[TMP7]], 0, !dbg [[DBG10]]
104+
; CHECK-NEXT: br i1 [[TMP8]], label [[FINAL_THEN:%.*]], label [[CODEREPL:%.*]], !dbg [[DBG10]]
105+
;
106+
;
107+
; CHECK-LABEL: define internal void @nanos6_ol_duplicate_foo
108+
; CHECK-SAME: (ptr [[TASK_ARGS_SRC:%.*]], ptr [[TASK_ARGS_DST:%.*]]) {
109+
; CHECK-NEXT: entry:
110+
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[TASK_ARGS_DST]], align 8
111+
; CHECK-NEXT: [[ARGS_END:%.*]] = getelementptr i8, ptr [[TMP0]], i64 32
112+
; CHECK-NEXT: [[GEP_DST_VLA:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO:%.*]], ptr [[TMP0]], i32 0, i32 0
113+
; CHECK-NEXT: store ptr [[ARGS_END]], ptr [[GEP_DST_VLA]], align 4
114+
; CHECK-NEXT: [[GEP_DST_:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TMP0]], i32 0, i32 1
115+
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[GEP_DST_]], align 8
116+
; CHECK-NEXT: [[TMP2:%.*]] = mul nuw i64 4, [[TMP1]]
117+
; CHECK-NEXT: [[GEP_DST_1:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TMP0]], i32 0, i32 2
118+
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[GEP_DST_1]], align 8
119+
; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP2]], [[TMP3]]
120+
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[ARGS_END]], i64 [[TMP4]]
121+
; CHECK-NEXT: [[GEP_SRC_:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TASK_ARGS_SRC]], i32 0, i32 1
122+
; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[GEP_SRC_]], align 8
123+
; CHECK-NEXT: [[TMP7:%.*]] = mul nuw i64 1, [[TMP6]]
124+
; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TASK_ARGS_SRC]], i32 0, i32 2
125+
; CHECK-NEXT: [[TMP8:%.*]] = load i64, ptr [[GEP_SRC_2]], align 8
126+
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP7]], [[TMP8]]
127+
; CHECK-NEXT: [[GEP_SRC_VLA:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TASK_ARGS_SRC]], i32 0, i32 0
128+
; CHECK-NEXT: [[GEP_DST_VLA3:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TMP0]], i32 0, i32 0
129+
; CHECK-NEXT: [[TMP10:%.*]] = load ptr, ptr [[GEP_SRC_VLA]], align 8
130+
; CHECK-NEXT: [[TMP11:%.*]] = load ptr, ptr [[GEP_DST_VLA3]], align 8
131+
; CHECK-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP9]], 4
132+
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 4 [[TMP11]], ptr align 4 [[TMP10]], i64 [[TMP12]], i1 false)
133+
; CHECK-NEXT: [[CAPT_GEP_SRC_:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TASK_ARGS_SRC]], i32 0, i32 1
134+
; CHECK-NEXT: [[CAPT_GEP_DST_:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TMP0]], i32 0, i32 1
135+
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[CAPT_GEP_SRC_]], align 8
136+
; CHECK-NEXT: store i64 [[TMP13]], ptr [[CAPT_GEP_DST_]], align 8
137+
; CHECK-NEXT: [[CAPT_GEP_SRC_4:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TASK_ARGS_SRC]], i32 0, i32 2
138+
; CHECK-NEXT: [[CAPT_GEP_DST_5:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TMP0]], i32 0, i32 2
139+
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[CAPT_GEP_SRC_4]], align 8
140+
; CHECK-NEXT: store i64 [[TMP14]], ptr [[CAPT_GEP_DST_5]], align 8
141+
; CHECK-NEXT: ret void
142+
;
143+
;
144+
; CHECK-LABEL: define internal void @nanos6_unpacked_task_region_foo
145+
; CHECK-SAME: (ptr [[VLA:%.*]], i64 [[TMP0:%.*]], i64 [[TMP1:%.*]], ptr [[DEVICE_ENV:%.*]], ptr [[ADDRESS_TRANSLATION_TABLE:%.*]]) !dbg [[DBG12:![0-9]+]] {
146+
; CHECK-NEXT: newFuncRoot:
147+
; CHECK-NEXT: br label [[TMP2:%.*]], !dbg [[DBG13:![0-9]+]]
148+
; CHECK: 2:
149+
; CHECK-NEXT: br label [[DOTEXITSTUB:%.*]], !dbg [[DBG14:![0-9]+]]
150+
; CHECK: .exitStub:
151+
; CHECK-NEXT: ret void
152+
;
153+
;
154+
; CHECK-LABEL: define internal void @nanos6_ol_task_region_foo
155+
; CHECK-SAME: (ptr [[TASK_ARGS:%.*]], ptr [[DEVICE_ENV:%.*]], ptr [[ADDRESS_TRANSLATION_TABLE:%.*]]) {
156+
; CHECK-NEXT: entry:
157+
; CHECK-NEXT: [[GEP_VLA:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO:%.*]], ptr [[TASK_ARGS]], i32 0, i32 0
158+
; CHECK-NEXT: [[LOAD_GEP_VLA:%.*]] = load ptr, ptr [[GEP_VLA]], align 8
159+
; CHECK-NEXT: [[CAPT_GEP:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TASK_ARGS]], i32 0, i32 1
160+
; CHECK-NEXT: [[LOAD_CAPT_GEP:%.*]] = load i64, ptr [[CAPT_GEP]], align 8
161+
; CHECK-NEXT: [[CAPT_GEP1:%.*]] = getelementptr [[NANOS6_TASK_ARGS_FOO]], ptr [[TASK_ARGS]], i32 0, i32 2
162+
; CHECK-NEXT: [[LOAD_CAPT_GEP1:%.*]] = load i64, ptr [[CAPT_GEP1]], align 8
163+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ne ptr [[ADDRESS_TRANSLATION_TABLE]], null
164+
; CHECK-NEXT: br i1 [[TMP0]], label [[TMP1:%.*]], label [[TMP2:%.*]]
165+
; CHECK: 1:
166+
; CHECK-NEXT: br label [[TMP2]]
167+
; CHECK: 2:
168+
; CHECK-NEXT: call void @nanos6_unpacked_task_region_foo(ptr [[LOAD_GEP_VLA]], i64 [[LOAD_CAPT_GEP]], i64 [[LOAD_CAPT_GEP1]], ptr [[DEVICE_ENV]], ptr [[ADDRESS_TRANSLATION_TABLE]])
169+
; CHECK-NEXT: ret void
170+
;
171+
;
172+
; CHECK-LABEL: define internal void @nanos6_constructor_register_task_info() {
173+
; CHECK-NEXT: entry:
174+
; CHECK-NEXT: call void @nanos6_register_task_info(ptr @task_info_var_foo)
175+
; CHECK-NEXT: ret void
176+
;

0 commit comments

Comments
 (0)