@@ -875,3 +875,124 @@ if.end:
875
875
%exitcond.not = icmp eq i64 %indvars.iv.next , %wide.trip.count
876
876
br i1 %exitcond.not , label %for.cond.cleanup , label %for.body
877
877
}
878
+
879
+ declare i64 @payload (i64 , ptr , ptr , i64 )
880
+
881
+ define void @outer_latch_heuristic (ptr %dst , ptr %src , i64 %p , i64 %dim ) {
882
+ ; CHECKOO-LABEL: @outer_latch_heuristic(
883
+ ; CHECKOO-NEXT: entry:
884
+ ; CHECKOO-NEXT: br label [[OUTER_LOOP:%.*]]
885
+ ; CHECKOO: outer.loop:
886
+ ; CHECKOO-NEXT: [[K_020_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[SELECT_END:%.*]] ], [ 0, [[ENTRY:%.*]] ]
887
+ ; CHECKOO-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[SELECT_END]] ], [ 0, [[ENTRY]] ]
888
+ ; CHECKOO-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[SELECT_END]] ], [ 0, [[ENTRY]] ]
889
+ ; CHECKOO-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
890
+ ; CHECKOO-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_US]], align 8
891
+ ; CHECKOO-NEXT: [[ARRAYIDX1_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
892
+ ; CHECKOO-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX1_US]], align 8
893
+ ; CHECKOO-NEXT: br label [[INNER_LOOP:%.*]]
894
+ ; CHECKOO: inner.loop:
895
+ ; CHECKOO-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[DIM:%.*]], [[OUTER_LOOP]] ], [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ]
896
+ ; CHECKOO-NEXT: [[DIFF_04_I_US:%.*]] = phi i64 [ [[CALL_I_US:%.*]], [[INNER_LOOP]] ], [ 0, [[OUTER_LOOP]] ]
897
+ ; CHECKOO-NEXT: [[CALL_I_US]] = tail call i64 @payload(i64 [[DIFF_04_I_US]], ptr [[TMP0]], ptr [[TMP1]], i64 [[P:%.*]])
898
+ ; CHECKOO-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
899
+ ; CHECKOO-NEXT: [[EXITCOND_NOT_I_US:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
900
+ ; CHECKOO-NEXT: br i1 [[EXITCOND_NOT_I_US]], label [[LATCH:%.*]], label [[INNER_LOOP]]
901
+ ; CHECKOO: latch:
902
+ ; CHECKOO-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[CALL_I_US]], -1
903
+ ; CHECKOO-NEXT: [[DIFF_0_LCSSA_I_LOBIT_US:%.*]] = lshr i64 [[CALL_I_US]], 63
904
+ ; CHECKOO-NEXT: [[CMP2_US_FROZEN:%.*]] = freeze i1 [[CMP2_US]]
905
+ ; CHECKOO-NEXT: br i1 [[CMP2_US_FROZEN]], label [[SELECT_TRUE_SINK:%.*]], label [[SELECT_FALSE_SINK:%.*]]
906
+ ; CHECKOO: select.true.sink:
907
+ ; CHECKOO-NEXT: [[TMP2:%.*]] = add nsw i64 [[J]], 1
908
+ ; CHECKOO-NEXT: br label [[SELECT_END]]
909
+ ; CHECKOO: select.false.sink:
910
+ ; CHECKOO-NEXT: [[TMP3:%.*]] = add nsw i64 1, [[I]]
911
+ ; CHECKOO-NEXT: br label [[SELECT_END]]
912
+ ; CHECKOO: select.end:
913
+ ; CHECKOO-NEXT: [[I_NEXT]] = phi i64 [ [[I]], [[SELECT_TRUE_SINK]] ], [ [[TMP3]], [[SELECT_FALSE_SINK]] ]
914
+ ; CHECKOO-NEXT: [[J_NEXT]] = phi i64 [ [[TMP2]], [[SELECT_TRUE_SINK]] ], [ [[J]], [[SELECT_FALSE_SINK]] ]
915
+ ; CHECKOO-NEXT: [[COND_IN_US:%.*]] = phi ptr [ [[ARRAYIDX1_US]], [[SELECT_TRUE_SINK]] ], [ [[ARRAYIDX_US]], [[SELECT_FALSE_SINK]] ]
916
+ ; CHECKOO-NEXT: [[INC4_US:%.*]] = zext i1 [[CMP2_US]] to i64
917
+ ; CHECKOO-NEXT: [[COND_US:%.*]] = load ptr, ptr [[COND_IN_US]], align 8
918
+ ; CHECKOO-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[K_020_US]]
919
+ ; CHECKOO-NEXT: store ptr [[COND_US]], ptr [[ARRAYIDX6_US]], align 8
920
+ ; CHECKOO-NEXT: [[INC7_US]] = add i64 [[K_020_US]], 1
921
+ ; CHECKOO-NEXT: [[EXITCOND23_NOT:%.*]] = icmp eq i64 [[K_020_US]], 1000
922
+ ; CHECKOO-NEXT: br i1 [[EXITCOND23_NOT]], label [[EXIT:%.*]], label [[OUTER_LOOP]]
923
+ ; CHECKOO: exit:
924
+ ; CHECKOO-NEXT: ret void
925
+ ;
926
+ ; CHECKII-LABEL: @outer_latch_heuristic(
927
+ ; CHECKII-NEXT: entry:
928
+ ; CHECKII-NEXT: br label [[OUTER_LOOP:%.*]]
929
+ ; CHECKII: outer.loop:
930
+ ; CHECKII-NEXT: [[K_020_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ]
931
+ ; CHECKII-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
932
+ ; CHECKII-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ]
933
+ ; CHECKII-NEXT: [[ARRAYIDX_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC:%.*]], i64 [[I]]
934
+ ; CHECKII-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX_US]], align 8
935
+ ; CHECKII-NEXT: [[ARRAYIDX1_US:%.*]] = getelementptr inbounds ptr, ptr [[SRC]], i64 [[J]]
936
+ ; CHECKII-NEXT: [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX1_US]], align 8
937
+ ; CHECKII-NEXT: br label [[INNER_LOOP:%.*]]
938
+ ; CHECKII: inner.loop:
939
+ ; CHECKII-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[DIM:%.*]], [[OUTER_LOOP]] ], [ [[LSR_IV_NEXT:%.*]], [[INNER_LOOP]] ]
940
+ ; CHECKII-NEXT: [[DIFF_04_I_US:%.*]] = phi i64 [ [[CALL_I_US:%.*]], [[INNER_LOOP]] ], [ 0, [[OUTER_LOOP]] ]
941
+ ; CHECKII-NEXT: [[CALL_I_US]] = tail call i64 @payload(i64 [[DIFF_04_I_US]], ptr [[TMP0]], ptr [[TMP1]], i64 [[P:%.*]])
942
+ ; CHECKII-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
943
+ ; CHECKII-NEXT: [[EXITCOND_NOT_I_US:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
944
+ ; CHECKII-NEXT: br i1 [[EXITCOND_NOT_I_US]], label [[LATCH]], label [[INNER_LOOP]]
945
+ ; CHECKII: latch:
946
+ ; CHECKII-NEXT: [[CMP2_US:%.*]] = icmp sgt i64 [[CALL_I_US]], -1
947
+ ; CHECKII-NEXT: [[DIFF_0_LCSSA_I_LOBIT_US:%.*]] = lshr i64 [[CALL_I_US]], 63
948
+ ; CHECKII-NEXT: [[I_NEXT]] = add nsw i64 [[DIFF_0_LCSSA_I_LOBIT_US]], [[I]]
949
+ ; CHECKII-NEXT: [[INC4_US:%.*]] = zext i1 [[CMP2_US]] to i64
950
+ ; CHECKII-NEXT: [[J_NEXT]] = add nsw i64 [[J]], [[INC4_US]]
951
+ ; CHECKII-NEXT: [[COND_IN_US:%.*]] = select i1 [[CMP2_US]], ptr [[ARRAYIDX1_US]], ptr [[ARRAYIDX_US]]
952
+ ; CHECKII-NEXT: [[COND_US:%.*]] = load ptr, ptr [[COND_IN_US]], align 8
953
+ ; CHECKII-NEXT: [[ARRAYIDX6_US:%.*]] = getelementptr inbounds ptr, ptr [[DST:%.*]], i64 [[K_020_US]]
954
+ ; CHECKII-NEXT: store ptr [[COND_US]], ptr [[ARRAYIDX6_US]], align 8
955
+ ; CHECKII-NEXT: [[INC7_US]] = add i64 [[K_020_US]], 1
956
+ ; CHECKII-NEXT: [[EXITCOND23_NOT:%.*]] = icmp eq i64 [[K_020_US]], 1000
957
+ ; CHECKII-NEXT: br i1 [[EXITCOND23_NOT]], label [[EXIT:%.*]], label [[OUTER_LOOP]]
958
+ ; CHECKII: exit:
959
+ ; CHECKII-NEXT: ret void
960
+ ;
961
+ entry:
962
+ br label %outer.loop
963
+
964
+ outer.loop:
965
+ %k.020.us = phi i64 [ %inc7.us , %latch ], [ 0 , %entry ]
966
+ %j = phi i64 [ %j.next , %latch ], [ 0 , %entry ]
967
+ %i = phi i64 [ %i.next , %latch ], [ 0 , %entry ]
968
+ %arrayidx.us = getelementptr inbounds ptr , ptr %src , i64 %i
969
+ %4 = load ptr , ptr %arrayidx.us , align 8
970
+ %arrayidx1.us = getelementptr inbounds ptr , ptr %src , i64 %j
971
+ %5 = load ptr , ptr %arrayidx1.us , align 8
972
+ br label %inner.loop
973
+
974
+ inner.loop:
975
+ %lsr.iv = phi i64 [ %dim , %outer.loop ], [ %lsr.iv.next , %inner.loop ]
976
+ %diff.04.i.us = phi i64 [ %call.i.us , %inner.loop ], [ 0 , %outer.loop ]
977
+ %call.i.us = tail call i64 @payload (i64 %diff.04.i.us , ptr %4 , ptr %5 , i64 %p )
978
+ %lsr.iv.next = add i64 %lsr.iv , -1
979
+ %exitcond.not.i.us = icmp eq i64 %lsr.iv.next , 0
980
+ br i1 %exitcond.not.i.us , label %latch , label %inner.loop
981
+
982
+ latch:
983
+ %cmp2.us = icmp sgt i64 %call.i.us , -1
984
+ %diff.0.lcssa.i.lobit.us = lshr i64 %call.i.us , 63
985
+ %i.next = add nsw i64 %diff.0.lcssa.i.lobit.us , %i
986
+ %inc4.us = zext i1 %cmp2.us to i64
987
+ %j.next = add nsw i64 %j , %inc4.us
988
+ %cond.in.us = select i1 %cmp2.us , ptr %arrayidx1.us , ptr %arrayidx.us
989
+ %cond.us = load ptr , ptr %cond.in.us , align 8
990
+ %arrayidx6.us = getelementptr inbounds ptr , ptr %dst , i64 %k.020.us
991
+ store ptr %cond.us , ptr %arrayidx6.us , align 8
992
+ %inc7.us = add i64 %k.020.us , 1
993
+ %exitcond23.not = icmp eq i64 %k.020.us , 1000
994
+ br i1 %exitcond23.not , label %exit , label %outer.loop
995
+
996
+ exit:
997
+ ret void
998
+ }
0 commit comments