Skip to content

Commit f8b719f

Browse files
authored
[OpenMP][flang][do-conc] Map values depending on values defined outside target region. (llvm#148)
Adds support for `do concurrent` mapping when mapped value(s) depend on values defined outside the target region; e.g. the size of the array is dynamic. This needs to be handled by localizing these region outsiders by either cloning them in the region or in case we cannot do that, map them and use the mapped values.
1 parent cb01ff8 commit f8b719f

File tree

4 files changed

+129
-8
lines changed

4 files changed

+129
-8
lines changed

flang/lib/Lower/OpenMP/OpenMP.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,9 +1320,11 @@ static void genBodyOfTargetOp(
13201320
while (!valuesDefinedAbove.empty()) {
13211321
for (mlir::Value val : valuesDefinedAbove) {
13221322
mlir::Operation *valOp = val.getDefiningOp();
1323+
assert(valOp != nullptr);
13231324
if (mlir::isMemoryEffectFree(valOp)) {
13241325
mlir::Operation *clonedOp = valOp->clone();
13251326
regionBlock->push_front(clonedOp);
1327+
assert(clonedOp->getNumResults() == 1);
13261328
val.replaceUsesWithIf(
13271329
clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) {
13281330
return use.getOwner()->getBlock() == regionBlock;

flang/lib/Optimizer/Transforms/DoConcurrentConversion.cpp

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,68 @@ mlir::Value calculateTripCount(fir::FirOpBuilder &builder, mlir::Location loc,
147147

148148
return tripCount;
149149
}
150+
151+
/// Check if cloning the bounds introduced any dependency on the outer region.
152+
/// If so, then either clone them as well if they are MemoryEffectFree, or else
153+
/// copy them to a new temporary and add them to the map and block_argument
154+
/// lists and replace their uses with the new temporary.
155+
///
156+
/// TODO: similar to the above functions, this is copied from OpenMP lowering
157+
/// (in this case, from `genBodyOfTargetOp`). Once we move to a common lib for
158+
/// these utils this will move as well.
159+
void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder,
160+
mlir::omp::TargetOp targetOp) {
161+
mlir::Region &targetRegion = targetOp.getRegion();
162+
mlir::Block *targetEntryBlock = &targetRegion.getBlocks().front();
163+
llvm::SetVector<mlir::Value> valuesDefinedAbove;
164+
mlir::getUsedValuesDefinedAbove(targetRegion, valuesDefinedAbove);
165+
166+
while (!valuesDefinedAbove.empty()) {
167+
for (mlir::Value val : valuesDefinedAbove) {
168+
mlir::Operation *valOp = val.getDefiningOp();
169+
assert(valOp != nullptr);
170+
if (mlir::isMemoryEffectFree(valOp)) {
171+
mlir::Operation *clonedOp = valOp->clone();
172+
targetEntryBlock->push_front(clonedOp);
173+
assert(clonedOp->getNumResults() == 1);
174+
val.replaceUsesWithIf(
175+
clonedOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) {
176+
return use.getOwner()->getBlock() == targetEntryBlock;
177+
});
178+
} else {
179+
mlir::OpBuilder::InsertionGuard guard(builder);
180+
builder.setInsertionPointAfter(valOp);
181+
auto copyVal = builder.createTemporary(val.getLoc(), val.getType());
182+
builder.createStoreWithConvert(copyVal.getLoc(), val, copyVal);
183+
184+
llvm::SmallVector<mlir::Value> bounds;
185+
std::stringstream name;
186+
builder.setInsertionPoint(targetOp);
187+
mlir::Value mapOp = createMapInfoOp(
188+
builder, copyVal.getLoc(), copyVal,
189+
/*varPtrPtr=*/mlir::Value{}, name.str(), bounds,
190+
/*members=*/llvm::SmallVector<mlir::Value>{},
191+
/*membersIndex=*/mlir::DenseIntElementsAttr{},
192+
static_cast<
193+
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
194+
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT),
195+
mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType());
196+
targetOp.getMapVarsMutable().append(mapOp);
197+
mlir::Value clonedValArg =
198+
targetRegion.addArgument(copyVal.getType(), copyVal.getLoc());
199+
builder.setInsertionPointToStart(targetEntryBlock);
200+
auto loadOp =
201+
builder.create<fir::LoadOp>(clonedValArg.getLoc(), clonedValArg);
202+
val.replaceUsesWithIf(
203+
loadOp->getResult(0), [targetEntryBlock](mlir::OpOperand &use) {
204+
return use.getOwner()->getBlock() == targetEntryBlock;
205+
});
206+
}
207+
}
208+
valuesDefinedAbove.clear();
209+
mlir::getUsedValuesDefinedAbove(targetRegion, valuesDefinedAbove);
210+
}
211+
}
150212
} // namespace internal
151213
} // namespace omp
152214
} // namespace lower
@@ -720,14 +782,19 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
720782
llvm::SmallVector<mlir::Value> boundsOps;
721783
genBoundsOps(rewriter, liveIn.getLoc(), declareOp, boundsOps);
722784

785+
// Use the raw address to avoid unboxing `fir.box` values whenever possible.
786+
// Put differently, if we have access to the direct value memory
787+
// reference/address, we use it.
788+
mlir::Value rawAddr = declareOp.getOriginalBase();
723789
return Fortran::lower::omp ::internal::createMapInfoOp(
724-
rewriter, liveIn.getLoc(), declareOp.getBase(), /*varPtrPtr=*/{},
725-
declareOp.getUniqName().str(), boundsOps, /*members=*/{},
790+
rewriter, liveIn.getLoc(), rawAddr,
791+
/*varPtrPtr=*/{}, declareOp.getUniqName().str(), boundsOps,
792+
/*members=*/{},
726793
/*membersIndex=*/mlir::DenseIntElementsAttr{},
727794
static_cast<
728795
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
729796
mapFlag),
730-
captureKind, liveInType);
797+
captureKind, rawAddr.getType());
731798
}
732799

733800
mlir::omp::TargetOp genTargetOp(mlir::Location loc,
@@ -754,14 +821,24 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
754821
auto miOp = mlir::cast<mlir::omp::MapInfoOp>(mapInfoOp.getDefiningOp());
755822
hlfir::DeclareOp liveInDeclare = genLiveInDeclare(rewriter, arg, miOp);
756823
mlir::Value miOperand = miOp.getVariableOperand(0);
757-
mapper.map(miOperand, liveInDeclare.getBase());
824+
825+
// TODO If `miOperand.getDefiningOp()` is a `fir::BoxAddrOp`, we probably
826+
// need to "unpack" the box by getting the defining op of it's value.
827+
// However, we did not hit this case in reality yet so leaving it as a
828+
// todo for now.
829+
830+
mapper.map(miOperand, liveInDeclare.getOriginalBase());
758831

759832
if (auto origDeclareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
760833
miOperand.getDefiningOp()))
761-
mapper.map(origDeclareOp.getOriginalBase(),
762-
liveInDeclare.getOriginalBase());
834+
mapper.map(origDeclareOp.getBase(), liveInDeclare.getBase());
763835
}
764836

837+
fir::FirOpBuilder firBuilder(
838+
rewriter,
839+
fir::getKindMapping(targetOp->getParentOfType<mlir::ModuleOp>()));
840+
Fortran::lower::omp::internal::cloneOrMapRegionOutsiders(firBuilder,
841+
targetOp);
765842
rewriter.setInsertionPoint(
766843
rewriter.create<mlir::omp::TerminatorOp>(targetOp.getLoc()));
767844

flang/test/Transforms/DoConcurrent/basic_device.f90

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@ program do_concurrent_basic
2121

2222
! CHECK-NOT: fir.do_loop
2323

24-
! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#0
24+
! CHECK-DAG: %[[I_MAP_INFO:.*]] = omp.map.info var_ptr(%[[I_ORIG_DECL]]#1
2525
! CHECK: %[[C0:.*]] = arith.constant 0 : index
2626
! CHECK: %[[UPPER_BOUND:.*]] = arith.subi %[[A_EXTENT]], %[[C0]] : index
2727

2828
! CHECK: %[[A_BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index)
2929
! CHECK-SAME: upper_bound(%[[UPPER_BOUND]] : index)
3030
! CHECK-SAME: extent(%[[A_EXTENT]] : index)
3131

32-
! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#0 : {{[^(]+}})
32+
! CHECK-DAG: %[[A_MAP_INFO:.*]] = omp.map.info var_ptr(%[[A_ORIG_DECL]]#1 : {{[^(]+}})
3333
! CHECK-SAME: map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[A_BOUNDS]])
3434

3535
! CHECK: %[[TRIP_COUNT:.*]] = arith.muli %{{.*}}, %{{.*}} : i64
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
! Tests `do concurrent` mapping when mapped value(s) depend on values defined
2+
! outside the target region; e.g. the size of the array is dynamic. This needs
3+
! to be handled by localizing these region outsiders by either cloning them in
4+
! the region or in case we cannot do that, map them and use the mapped values.
5+
6+
! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-parallel=device %s -o - \
7+
! RUN: | FileCheck %s
8+
9+
subroutine foo(n)
10+
implicit none
11+
integer :: n
12+
integer :: i
13+
integer, dimension(n) :: a
14+
15+
do concurrent(i=1:10)
16+
a(i) = i
17+
end do
18+
end subroutine
19+
20+
! CHECK-DAG: %[[I_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFfooEi"}
21+
! CHECK-DAG: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFfooEa"}
22+
! CHECK-DAG: %[[N_ALLOC:.*]] = fir.alloca i32
23+
24+
! CHECK-DAG: %[[I_MAP:.*]] = omp.map.info var_ptr(%[[I_DECL]]#1 : {{.*}})
25+
! CHECK-DAG: %[[A_MAP:.*]] = omp.map.info var_ptr(%[[A_DECL]]#1 : {{.*}})
26+
! CHECK-DAG: %[[N_MAP:.*]] = omp.map.info var_ptr(%[[N_ALLOC]] : {{.*}})
27+
28+
! CHECK: omp.target
29+
! CHECK-SAME: map_entries(%[[I_MAP]] -> %[[I_ARG:arg[0-9]*]],
30+
! CHECK-SAME: %[[A_MAP]] -> %[[A_ARG:arg[0-9]*]],
31+
! CHECK-SAME: %[[N_MAP]] -> %[[N_ARG:arg[0-9]*]] : {{.*}})
32+
! CHECK-SAME: {{.*}} {
33+
34+
! CHECK-DAG: %{{.*}} = hlfir.declare %[[I_ARG]]
35+
! CHECK-DAG: %{{.*}} = hlfir.declare %[[A_ARG]]
36+
! CHECK-DAG: %{{.*}} = fir.load %[[N_ARG]]
37+
38+
! CHECK: omp.terminator
39+
! CHECK: }
40+
41+
42+

0 commit comments

Comments
 (0)