diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c index d737ab33e9ca4..a73eb963f710f 100644 --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -88,6 +88,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-LABEL: define {{[^@]+}}@_Z14parallel_for_0v..omp_par // CHECK-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK-NEXT: omp.par.entry: +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -96,10 +100,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK: omp.par.region: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 @@ -286,6 +286,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL8:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR2]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL8]], align 4 @@ -294,10 +298,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION5:%.*]] // CHECK: omp.par.region5: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 @@ -508,6 +508,14 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 // CHECK-NEXT: [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -516,18 +524,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I160:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK: omp.par.region: // CHECK-NEXT: store i32 0, ptr [[I]], align 4 @@ -658,6 +658,18 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 // CHECK-NEXT: [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL12]], align 4 @@ -666,26 +678,14 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I75:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[I135:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK: omp.par.region9: // CHECK-NEXT: store i32 0, ptr [[I16]], align 4 @@ -875,6 +875,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4 @@ -883,10 +887,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION103:%.*]] // CHECK: omp.par.region103: // CHECK-NEXT: store i32 0, ptr [[I110]], align 4 @@ -954,6 +954,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4 // CHECK-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4 @@ -962,10 +966,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 -// CHECK-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-NEXT: br label [[OMP_PAR_REGION44:%.*]] // CHECK: omp.par.region44: // CHECK-NEXT: store i32 0, ptr [[I51]], align 4 @@ -1521,6 +1521,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-LABEL: define {{[^@]+}}@_Z14parallel_for_0v..omp_par // CHECK-DEBUG-SAME: (ptr noalias [[TID_ADDR:%.*]], ptr noalias [[ZERO_ADDR:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG19:![0-9]+]] { // CHECK-DEBUG-NEXT: omp.par.entry: +// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP0:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP0]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -1529,10 +1533,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META21:![0-9]+]], metadata !DIExpression()), !dbg [[DBG26:![0-9]+]] @@ -1731,6 +1731,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL8:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR2]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL8]], align 4 @@ -1739,10 +1743,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED12:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION5:%.*]] // CHECK-DEBUG: omp.par.region5: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META95:![0-9]+]], metadata !DIExpression()), !dbg [[DBG100:![0-9]+]] @@ -1966,6 +1966,14 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[STRUCTARG214:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -1974,18 +1982,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_3:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_4:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[I160:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED161:%.*]] = alloca [[STRUCT_ANON_15:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED162:%.*]] = alloca [[STRUCT_ANON_16:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR163:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER178:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND179:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND180:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE181:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK-DEBUG: omp.par.region: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I]], metadata [[META158:![0-9]+]], metadata !DIExpression()), !dbg [[DBG162:![0-9]+]] @@ -2118,6 +2118,18 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[STRUCTARG209:%.*]] = alloca { ptr, ptr, ptr }, align 8 // CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { ptr, ptr, ptr }, align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL12:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR6]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL12]], align 4 @@ -2126,26 +2138,14 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED17:%.*]] = alloca [[STRUCT_ANON_5:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED18:%.*]] = alloca [[STRUCT_ANON_6:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR19:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER34:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND35:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND36:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE37:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[I75:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED76:%.*]] = alloca [[STRUCT_ANON_9:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED77:%.*]] = alloca [[STRUCT_ANON_10:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR78:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER93:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND94:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND95:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE96:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[I135:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED136:%.*]] = alloca [[STRUCT_ANON_13:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED137:%.*]] = alloca [[STRUCT_ANON_14:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR138:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER153:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND154:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND155:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE156:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION9:%.*]] // CHECK-DEBUG: omp.par.region9: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I16]], metadata [[META188:![0-9]+]], metadata !DIExpression()), !dbg [[DBG193:![0-9]+]] @@ -2338,6 +2338,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL106:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR100]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL106]], align 4 @@ -2346,10 +2350,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED111:%.*]] = alloca [[STRUCT_ANON_11:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED112:%.*]] = alloca [[STRUCT_ANON_12:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR113:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER128:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND129:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND130:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE131:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION103:%.*]] // CHECK-DEBUG: omp.par.region103: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I110]], metadata [[META235:![0-9]+]], metadata !DIExpression()), !dbg [[DBG241:![0-9]+]] @@ -2418,6 +2418,10 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[LOADGEP_B_ADDR:%.*]] = load ptr, ptr [[GEP_B_ADDR]], align 8 // CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { ptr, ptr, ptr }, ptr [[TMP0]], i32 0, i32 2 // CHECK-DEBUG-NEXT: [[LOADGEP_R_ADDR:%.*]] = load ptr, ptr [[GEP_R_ADDR]], align 8 +// CHECK-DEBUG-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 +// CHECK-DEBUG-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TID_ADDR_LOCAL47:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR41]], align 4 // CHECK-DEBUG-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL47]], align 4 @@ -2426,10 +2430,6 @@ void parallel_for_2(float *r, int a, double b) { // CHECK-DEBUG-NEXT: [[AGG_CAPTURED52:%.*]] = alloca [[STRUCT_ANON_7:%.*]], align 8 // CHECK-DEBUG-NEXT: [[AGG_CAPTURED53:%.*]] = alloca [[STRUCT_ANON_8:%.*]], align 4 // CHECK-DEBUG-NEXT: [[DOTCOUNT_ADDR54:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LASTITER69:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_LOWERBOUND70:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_UPPERBOUND71:%.*]] = alloca i32, align 4 -// CHECK-DEBUG-NEXT: [[P_STRIDE72:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: br label [[OMP_PAR_REGION44:%.*]] // CHECK-DEBUG: omp.par.region44: // CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata ptr [[I51]], metadata [[META251:![0-9]+]], metadata !DIExpression()), !dbg [[DBG257:![0-9]+]] diff --git a/clang/test/OpenMP/nested_loop_codegen.cpp b/clang/test/OpenMP/nested_loop_codegen.cpp index 0eb76bc2e1c69..797f40114fcb2 100644 --- a/clang/test/OpenMP/nested_loop_codegen.cpp +++ b/clang/test/OpenMP/nested_loop_codegen.cpp @@ -545,6 +545,10 @@ int inline_decl() { // CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 // CHECK3-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 // CHECK3-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8 +// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -552,10 +556,6 @@ int inline_decl() { // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: // CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4 @@ -713,6 +713,10 @@ int inline_decl() { // CHECK3-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 // CHECK3-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 // CHECK3-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8 +// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK3-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -721,10 +725,6 @@ int inline_decl() { // CHECK3-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK3-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK3-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK3: omp.par.region: // CHECK3-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4 @@ -884,6 +884,10 @@ int inline_decl() { // CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 // CHECK4-NEXT: [[GEP_K:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 // CHECK4-NEXT: [[LOADGEP_K:%.*]] = load ptr, ptr [[GEP_K]], align 8 +// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -891,10 +895,6 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_0:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG23:![0-9]+]] @@ -1062,6 +1062,10 @@ int inline_decl() { // CHECK4-NEXT: [[LOADGEP_I:%.*]] = load ptr, ptr [[GEP_I]], align 8 // CHECK4-NEXT: [[GEP_RES:%.*]] = getelementptr { ptr, ptr }, ptr [[TMP0]], i32 0, i32 1 // CHECK4-NEXT: [[LOADGEP_RES:%.*]] = load ptr, ptr [[GEP_RES]], align 8 +// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4 // CHECK4-NEXT: [[TMP1:%.*]] = load i32, ptr [[TID_ADDR]], align 4 // CHECK4-NEXT: store i32 [[TMP1]], ptr [[TID_ADDR_LOCAL]], align 4 @@ -1070,10 +1074,6 @@ int inline_decl() { // CHECK4-NEXT: [[AGG_CAPTURED:%.*]] = alloca [[STRUCT_ANON_1:%.*]], align 8 // CHECK4-NEXT: [[AGG_CAPTURED1:%.*]] = alloca [[STRUCT_ANON_2:%.*]], align 4 // CHECK4-NEXT: [[DOTCOUNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LASTITER:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_LOWERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_UPPERBOUND:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[P_STRIDE:%.*]] = alloca i32, align 4 // CHECK4-NEXT: br label [[OMP_PAR_REGION:%.*]] // CHECK4: omp.par.region: // CHECK4-NEXT: store i32 0, ptr [[LOADGEP_I]], align 4, !dbg [[DBG87:![0-9]+]] diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 new file mode 100644 index 0000000000000..ea04d3d1dfa69 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 @@ -0,0 +1,48 @@ +!! Make sure that mixture of by-ref and by-val reductions work all the way +!! to LLVM-IR code. +! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s 2>&1 | FileCheck %s +subroutine proc + implicit none + real(8),allocatable :: F(:) + real(8),allocatable :: A(:) + + integer :: I + +!$omp parallel private(A) reduction(+:F,I) + allocate(A(10)) +!$omp end parallel +end subroutine proc + +!CHECK-LABEL: define void @proc_() +!CHECK: call void +!CHECK-SAME: @__kmpc_fork_call(ptr {{.*}}, i32 1, ptr @[[OMP_PAR:.*]], {{.*}}) + +!CHECK: define internal void @[[OMP_PAR]](ptr {{.*}} %[[TID_ADDR:.*]], ptr noalias +!CHECK: %[[TID_LOCAL:.*]] = alloca i32 +!CHECK: %[[TID:.*]] = load i32, ptr %[[TID_ADDR]] +!CHECK: store i32 %[[TID]], ptr %[[TID_LOCAL]] +!CHECK: %[[I_priv:.*]] = alloca i32 +!CHECK: %[[F_priv:.*]] = alloca ptr + +!CHECK: omp.reduction.init: +!CHECK: store ptr %{{.*}}, ptr %[[F_priv]] +!CHECK: store i32 0, ptr %[[I_priv]] + +!CHECK: omp.par.region8: +!CHECK-NEXT: call ptr @malloc +!CHECK-SAME: i64 10 + +!CHECK: %[[RED_ARR_0:.*]] = getelementptr inbounds [2 x ptr], ptr %red.array, i64 0, i64 0 +!CHECK: store ptr %[[F_priv]], ptr %[[RED_ARR_0:.*]] +!CHECK: %[[RED_ARR_1:.*]] = getelementptr inbounds [2 x ptr], ptr %red.array, i64 0, i64 1 +!CHECK: store ptr %[[I_priv]], ptr %[[RED_ARR_1]] + +!CHECK: omp.par.pre_finalize: ; preds = %reduce.finalize +!CHECK: %{{.*}} = load ptr, ptr %[[F_priv]] +!CHECK: br label %omp.reduction.cleanup + +!CHECK: omp.reduction.cleanup: +!CHECK: br i1 %{{.*}}, label %[[OMP_FREE:.*]], label %{{.*}} + +!CHECK: [[OMP_FREE]]: +!CHECK: call void @free diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 2c4b45255d059..db734d41232bd 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -1391,7 +1391,8 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::createParallel( // Change the location to the outer alloca insertion point to create and // initialize the allocas we pass into the parallel region. - Builder.restoreIP(OuterAllocaIP); + InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->begin()); + Builder.restoreIP(NewOuter); AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32, nullptr, "tid.addr"); AllocaInst *ZeroAddrAlloca = Builder.CreateAlloca(Int32, nullptr, "zero.addr"); @@ -2155,7 +2156,7 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, // values. unsigned NumReductions = ReductionInfos.size(); Type *RedArrayTy = ArrayType::get(Builder.getPtrTy(), NumReductions); - Builder.restoreIP(AllocaIP); + Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator()); Value *RedArray = Builder.CreateAlloca(RedArrayTy, nullptr, "red.array"); Builder.SetInsertPoint(InsertBlock, InsertBlock->end()); @@ -2556,7 +2557,8 @@ OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini); // Allocate space for computed loop bounds as expected by the "init" function. - Builder.restoreIP(AllocaIP); + Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca()); + Type *I32Type = Type::getInt32Ty(M.getContext()); Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); @@ -3118,7 +3120,7 @@ OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyDynamicWorkshareLoop( FunctionCallee DynamicNext = getKmpcForDynamicNextForType(IVTy, M, *this); // Allocate space for computed loop bounds as expected by the "init" function. - Builder.restoreIP(AllocaIP); + Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca()); Type *I32Type = Type::getInt32Ty(M.getContext()); Value *PLastIter = Builder.CreateAlloca(I32Type, nullptr, "p.lastiter"); Value *PLowerBound = Builder.CreateAlloca(IVTy, nullptr, "p.lowerbound"); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 6ec4c120c11ea..8927d11f02c5b 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -770,7 +770,7 @@ static void allocByValReductionVars( DenseMap &reductionVariableMap, llvm::ArrayRef isByRefs) { llvm::IRBuilderBase::InsertPointGuard guard(builder); - builder.restoreIP(allocaIP); + builder.SetInsertPoint(allocaIP.getBlock()->getTerminator()); auto args = loop.getRegion().getArguments().take_back(loop.getNumReductionVars()); @@ -780,7 +780,7 @@ static void allocByValReductionVars( llvm::Value *var = builder.CreateAlloca( moduleTranslation.convertType(reductionDecls[i].getType())); moduleTranslation.mapValue(args[i], var); - privateReductionVariables.push_back(var); + privateReductionVariables[i] = var; reductionVariableMap.try_emplace(loop.getReductionVars()[i], var); } } @@ -911,7 +911,8 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); - SmallVector privateReductionVariables; + SmallVector privateReductionVariables( + wsloopOp.getNumReductionVars()); DenseMap reductionVariableMap; allocByValReductionVars(wsloopOp, builder, moduleTranslation, allocaIP, reductionDecls, privateReductionVariables, @@ -942,7 +943,7 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, // ptr builder.CreateStore(phis[0], var); - privateReductionVariables.push_back(var); + privateReductionVariables[i] = var; moduleTranslation.mapValue(reductionArgs[i], phis[0]); reductionVariableMap.try_emplace(wsloopOp.getReductionVars()[i], phis[0]); } else { @@ -1140,7 +1141,8 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // Collect reduction declarations SmallVector reductionDecls; collectReductionDecls(opInst, reductionDecls); - SmallVector privateReductionVariables; + SmallVector privateReductionVariables( + opInst.getNumReductionVars()); auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // Allocate reduction vars @@ -1154,6 +1156,21 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, MutableArrayRef reductionArgs = opInst.getRegion().getArguments().take_back( opInst.getNumReductionVars()); + + llvm::BasicBlock *initBlock = splitBB(builder, true, "omp.reduction.init"); + allocaIP = + InsertPointTy(allocaIP.getBlock(), + allocaIP.getBlock()->getTerminator()->getIterator()); + SmallVector byRefVars(opInst.getNumReductionVars()); + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { + if (isByRef[i]) { + // Allocate reduction variable (which is a pointer to the real reduciton + // variable allocated in the inlined region) + byRefVars[i] = builder.CreateAlloca( + moduleTranslation.convertType(reductionDecls[i].getType())); + } + } + for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { SmallVector phis; @@ -1166,18 +1183,14 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, assert(phis.size() == 1 && "expected one value to be yielded from the " "reduction neutral element declaration region"); - builder.restoreIP(allocaIP); + builder.SetInsertPoint(initBlock->getTerminator()); if (isByRef[i]) { - // Allocate reduction variable (which is a pointer to the real reduciton - // variable allocated in the inlined region) - llvm::Value *var = builder.CreateAlloca( - moduleTranslation.convertType(reductionDecls[i].getType())); // Store the result of the inlined region to the allocated reduction var // ptr - builder.CreateStore(phis[0], var); + builder.CreateStore(phis[0], byRefVars[i]); - privateReductionVariables.push_back(var); + privateReductionVariables[i] = byRefVars[i]; moduleTranslation.mapValue(reductionArgs[i], phis[0]); reductionVariableMap.try_emplace(opInst.getReductionVars()[i], phis[0]); } else { diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir index 84a487cb8c98f..8afa89f1d8368 100644 --- a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir @@ -55,11 +55,11 @@ // Private reduction variable and its initialization. // CHECK: %tid.addr.local = alloca i32 -// CHECK: %[[MALLOC_I:.+]] = call ptr @malloc(i64 4) // CHECK: %[[PRIV_PTR_I:.+]] = alloca ptr +// CHECK: %[[PRIV_PTR_J:.+]] = alloca ptr +// CHECK: %[[MALLOC_I:.+]] = call ptr @malloc(i64 4) // CHECK: store ptr %[[MALLOC_I]], ptr %[[PRIV_PTR_I]] // CHECK: %[[MALLOC_J:.+]] = call ptr @malloc(i64 4) -// CHECK: %[[PRIV_PTR_J:.+]] = alloca ptr // CHECK: store ptr %[[MALLOC_J]], ptr %[[PRIV_PTR_J]] // Call to the reduction function. diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir index f4b77cbf413d4..361905f7cddeb 100644 --- a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir +++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir @@ -59,13 +59,15 @@ module { // CHECK: %[[VAL_17:.*]] = load i32, ptr %[[VAL_18:.*]], align 4 // CHECK: store i32 %[[VAL_17]], ptr %[[VAL_16]], align 4 // CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_16]], align 4 -// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8 // CHECK: %[[VAL_21:.*]] = alloca ptr, align 8 +// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8 +// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8 +// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8 +// CHECK: br label %[[INIT_LABEL:.*]] +// CHECK: [[INIT_LABEL]]: // CHECK: store ptr %[[VAL_13]], ptr %[[VAL_21]], align 8 // CHECK: %[[VAL_22:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_15]], align 8 -// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8 // CHECK: store ptr %[[VAL_15]], ptr %[[VAL_23]], align 8 -// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8 // CHECK: br label %[[VAL_25:.*]] // CHECK: omp.par.region: ; preds = %[[VAL_26:.*]] // CHECK: br label %[[VAL_27:.*]]