Skip to content

[Flang]Fix for changed code at the end of AllocaIP. #92430

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 72 additions & 72 deletions clang/test/OpenMP/irbuilder_nested_parallel_for.c

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions clang/test/OpenMP/nested_loop_codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -545,17 +545,17 @@ int inline_decl() {
// CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
// CHECK3-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
// CHECK3-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8
// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
// CHECK3-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
// CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
// CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]]
// CHECK3: omp.par.region:
// CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4
Expand Down Expand Up @@ -713,6 +713,10 @@ int inline_decl() {
// CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
// CHECK3-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
// CHECK3-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8
// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
// CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
Expand All @@ -721,10 +725,6 @@ int inline_decl() {
// CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8
// CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4
// CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]]
// CHECK3: omp.par.region:
// CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4
Expand Down Expand Up @@ -884,17 +884,17 @@ int inline_decl() {
// CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
// CHECK4-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
// CHECK4-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8
// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
// CHECK4-NEXT: [[TID:%.*]] = load i32, ptr [[TID_ADDR_LOCAL]], align 4
// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8
// CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4
// CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]]
// CHECK4: omp.par.region:
// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG23:![0-9]+]]
Expand Down Expand Up @@ -1062,6 +1062,10 @@ int inline_decl() {
// CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8
// CHECK4-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1
// CHECK4-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8
// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4
// CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4
Expand All @@ -1070,10 +1074,6 @@ int inline_decl() {
// CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8
// CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4
// CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4
// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4
// CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]]
// CHECK4: omp.par.region:
// CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG87:![0-9]+]]
Expand Down
48 changes: 48 additions & 0 deletions flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
!! Make sure that mixture of by-ref and by-val reductions work all the way
!! to LLVM-IR code.
! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s 2>&1 | FileCheck %s
subroutine proc
implicit none
real(8),allocatable :: F(:)
real(8),allocatable :: A(:)

integer :: I

!$omp parallel private(A) reduction(+:F,I)
allocate(A(10))
!$omp end parallel
end subroutine proc

!CHECK-LABEL: define void @proc_()
!CHECK: call void
!CHECK-SAME: @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @[[OMP_PAR:.*]], {{.*}})

!CHECK: define internal void @[[OMP_PAR]](ptr {{.*}} %[[TID_ADDR:.*]], ptr noalias
!CHECK: %[[TID_LOCAL:.*]] = alloca i32
!CHECK: %[[TID:.*]] = load i32, ptr %[[TID_ADDR]]
!CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]]
!CHECK: %[[I_priv:.*]] = alloca i32
!CHECK: %[[F_priv:.*]] = alloca ptr

!CHECK: omp.reduction.init:
!CHECK: store ptr %{{.*}}, ptr %[[F_priv]]
!CHECK: store i32 0, ptr %[[I_priv]]

!CHECK: omp.par.region8:
!CHECK-NEXT: call ptr @malloc
!CHECK-SAME: i64 10

!CHECK: %[[RED_ARR_0:.*]] = getelementptr inbounds [2 x ptr], ptr %red.array, i64 0, i64 0
!CHECK: store ptr %[[F_priv]], ptr %[[RED_ARR_0:.*]]
!CHECK: %[[RED_ARR_1:.*]] = getelementptr inbounds [2 x ptr], ptr %red.array, i64 0, i64 1
!CHECK: store ptr %[[I_priv]], ptr %[[RED_ARR_1]]

!CHECK: omp.par.pre_finalize: ; preds = %reduce.finalize
!CHECK: %{{.*}} = load ptr, ptr %[[F_priv]]
!CHECK: br label %omp.reduction.cleanup

!CHECK: omp.reduction.cleanup:
!CHECK: br i1 %{{.*}}, label %[[OMP_FREE:.*]], label %{{.*}}

!CHECK: [[OMP_FREE]]:
!CHECK: call void @free
10 changes: 6 additions & 4 deletions llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1391,7 +1391,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel(

// Change the location to the outer alloca insertion point to create and
// initialize the allocas we pass into the parallel region.
Builder.restoreIP(OuterAllocaIP);
InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->begin());
Builder.restoreIP(NewOuter);
AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32, nullptr, "tid.addr");
AllocaInst *ZeroAddrAlloca =
Builder.CreateAlloca(Int32, nullptr, "zero.addr");
Expand Down Expand Up @@ -2155,7 +2156,7 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
// values.
unsigned NumReductions = ReductionInfos.size();
Type *RedArrayTy = ArrayType::get(Builder.getPtrTy(), NumReductions);
Builder.restoreIP(AllocaIP);
Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array");

Builder.SetInsertPoint(InsertBlock, InsertBlock->end());
Expand Down Expand Up @@ -2556,7 +2557,8 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI,
getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);

// Allocate space for computed loop bounds as expected by the "init" function.
Builder.restoreIP(AllocaIP);
Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());

Type *I32Type = Type::getInt32Ty(M.getContext());
Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
Expand Down Expand Up @@ -3118,7 +3120,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop(
FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this);

// Allocate space for computed loop bounds as expected by the "init" function.
Builder.restoreIP(AllocaIP);
Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
Type *I32Type = Type::getInt32Ty(M.getContext());
Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter");
Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound");
Expand Down
37 changes: 25 additions & 12 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ static void allocByValReductionVars(
DenseMap<Value, llvm::Value *> &reductionVariableMap,
llvm::ArrayRef<bool> isByRefs) {
llvm::IRBuilderBase::InsertPointGuard guard(builder);
builder.restoreIP(allocaIP);
builder.SetInsertPoint(allocaIP.getBlock()->getTerminator());
auto args =
loop.getRegion().getArguments().take_back(loop.getNumReductionVars());

Expand All @@ -780,7 +780,7 @@ static void allocByValReductionVars(
llvm::Value *var = builder.CreateAlloca(
moduleTranslation.convertType(reductionDecls[i].getType()));
moduleTranslation.mapValue(args[i], var);
privateReductionVariables.push_back(var);
privateReductionVariables[i] = var;
reductionVariableMap.try_emplace(loop.getReductionVars()[i], var);
}
}
Expand Down Expand Up @@ -911,7 +911,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
llvm::OpenMPIRBuilder::InsertPointTy allocaIP =
findAllocaInsertPoint(builder, moduleTranslation);

SmallVector<llvm::Value *> privateReductionVariables;
SmallVector<llvm::Value *> privateReductionVariables(
wsloopOp.getNumReductionVars());
DenseMap<Value, llvm::Value *> reductionVariableMap;
allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP,
reductionDecls, privateReductionVariables,
Expand Down Expand Up @@ -942,7 +943,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
// ptr
builder.CreateStore(phis[0], var);

privateReductionVariables.push_back(var);
privateReductionVariables[i] = var;
moduleTranslation.mapValue(reductionArgs[i], phis[0]);
reductionVariableMap.try_emplace(wsloopOp.getReductionVars()[i], phis[0]);
} else {
Expand Down Expand Up @@ -1140,7 +1141,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
// Collect reduction declarations
SmallVector<omp::DeclareReductionOp> reductionDecls;
collectReductionDecls(opInst, reductionDecls);
SmallVector<llvm::Value *> privateReductionVariables;
SmallVector<llvm::Value *> privateReductionVariables(
opInst.getNumReductionVars());

auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) {
// Allocate reduction vars
Expand All @@ -1154,6 +1156,21 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
MutableArrayRef<BlockArgument> reductionArgs =
opInst.getRegion().getArguments().take_back(
opInst.getNumReductionVars());

llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init");
allocaIP =
InsertPointTy(allocaIP.getBlock(),
allocaIP.getBlock()->getTerminator()->getIterator());
SmallVector<llvm::Value *> byRefVars(opInst.getNumReductionVars());
for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
if (isByRef[i]) {
// Allocate reduction variable (which is a pointer to the real reduciton
// variable allocated in the inlined region)
byRefVars[i] = builder.CreateAlloca(
moduleTranslation.convertType(reductionDecls[i].getType()));
}
}

for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) {
SmallVector<llvm::Value *> phis;

Expand All @@ -1166,18 +1183,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder,
assert(phis.size() == 1 &&
"expected one value to be yielded from the "
"reduction neutral element declaration region");
builder.restoreIP(allocaIP);
builder.SetInsertPoint(initBlock->getTerminator());

if (isByRef[i]) {
// Allocate reduction variable (which is a pointer to the real reduciton
// variable allocated in the inlined region)
llvm::Value *var = builder.CreateAlloca(
moduleTranslation.convertType(reductionDecls[i].getType()));
// Store the result of the inlined region to the allocated reduction var
// ptr
builder.CreateStore(phis[0], var);
builder.CreateStore(phis[0], byRefVars[i]);

privateReductionVariables.push_back(var);
privateReductionVariables[i] = byRefVars[i];
moduleTranslation.mapValue(reductionArgs[i], phis[0]);
reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@

// Private reduction variable and its initialization.
// CHECK: %tid.addr.local = alloca i32
// CHECK: %[[MALLOC_I:.+]] = call ptr @malloc(i64 4)
// CHECK: %[[PRIV_PTR_I:.+]] = alloca ptr
// CHECK: %[[PRIV_PTR_J:.+]] = alloca ptr
// CHECK: %[[MALLOC_I:.+]] = call ptr @malloc(i64 4)
// CHECK: store ptr %[[MALLOC_I]], ptr %[[PRIV_PTR_I]]
// CHECK: %[[MALLOC_J:.+]] = call ptr @malloc(i64 4)
// CHECK: %[[PRIV_PTR_J:.+]] = alloca ptr
// CHECK: store ptr %[[MALLOC_J]], ptr %[[PRIV_PTR_J]]

// Call to the reduction function.
Expand Down
8 changes: 5 additions & 3 deletions mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,15 @@ module {
// CHECK: %[[VAL_17:.*]] = load i32, ptr %[[VAL_18:.*]], align 4
// CHECK: store i32 %[[VAL_17]], ptr %[[VAL_16]], align 4
// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_16]], align 4
// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8
// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8
// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8
// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8
// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8
// CHECK: br label %[[INIT_LABEL:.*]]
// CHECK: [[INIT_LABEL]]:
// CHECK: store ptr %[[VAL_13]], ptr %[[VAL_21]], align 8
// CHECK: %[[VAL_22:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_15]], align 8
// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8
// CHECK: store ptr %[[VAL_15]], ptr %[[VAL_23]], align 8
// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8
// CHECK: br label %[[VAL_25:.*]]
// CHECK: omp.par.region: ; preds = %[[VAL_26:.*]]
// CHECK: br label %[[VAL_27:.*]]
Expand Down
Loading