xref: /llvm-project/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll (revision a5891fa4d2b76cf9dec96da9ded59fc4937d3342)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes=loop-vectorize -S -prefer-predicate-over-epilogue=scalar-epilogue < %s | FileCheck %s
3
4target triple = "aarch64-unknown-linux-gnu"
5
6define void @inv_store_i16(ptr noalias %dst, ptr noalias readonly %src, i64 %N) #0 {
7; CHECK-LABEL: @inv_store_i16(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
10; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
11; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
12; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
13; CHECK:       vector.ph:
14; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
15; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
16; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
17; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
18; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
19; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
20; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
21; CHECK:       vector.body:
22; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
23; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
24; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, ptr [[SRC:%.*]], i64 [[TMP6]]
25; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, ptr [[TMP7]], i32 0
26; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i16>, ptr [[TMP8]], align 2
27; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
28; CHECK-NEXT:    [[TMP10:%.*]] = mul i32 [[TMP9]], 4
29; CHECK-NEXT:    [[TMP11:%.*]] = sub i32 [[TMP10]], 1
30; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_LOAD]], i32 [[TMP11]]
31; CHECK-NEXT:    store i16 [[TMP12]], ptr [[DST:%.*]], align 2
32; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
33; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
34; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
35; CHECK:       middle.block:
36; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
37; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_INC24:%.*]], label [[SCALAR_PH]]
38; CHECK:       scalar.ph:
39; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
40; CHECK-NEXT:    br label [[FOR_BODY14:%.*]]
41; CHECK:       for.body14:
42; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY14]] ]
43; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[SRC]], i64 [[INDVARS_IV]]
44; CHECK-NEXT:    [[LD:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
45; CHECK-NEXT:    store i16 [[LD]], ptr [[DST]], align 2
46; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
47; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
48; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_INC24]], label [[FOR_BODY14]], !llvm.loop [[LOOP4:![0-9]+]]
49; CHECK:       for.inc24:
50; CHECK-NEXT:    ret void
51;
52entry:
53  br label %for.body14
54
55for.body14:                                       ; preds = %for.body14, %entry
56  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body14 ]
57  %arrayidx = getelementptr inbounds i16, ptr %src, i64 %indvars.iv
58  %ld = load i16, ptr %arrayidx
59  store i16 %ld, ptr %dst, align 2
60  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
61  %exitcond.not = icmp eq i64 %indvars.iv.next, %N
62  br i1 %exitcond.not, label %for.inc24, label %for.body14, !llvm.loop !0
63
64for.inc24:                                        ; preds = %for.body14, %for.body
65  ret void
66}
67
68
69define void @cond_inv_store_i32(ptr noalias %dst, ptr noalias readonly %src, i64 %N) #0 {
70; CHECK-LABEL: @cond_inv_store_i32(
71; CHECK-NEXT:  entry:
72; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
73; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
74; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]]
75; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
76; CHECK:       vector.ph:
77; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
78; CHECK-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 4
79; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
80; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
81; CHECK-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
82; CHECK-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 4
83; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST:%.*]], i64 0
84; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
85; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
86; CHECK:       vector.body:
87; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
88; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 0
89; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i64 [[TMP6]]
90; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
91; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP8]], align 4
92; CHECK-NEXT:    [[TMP9:%.*]] = icmp sgt <vscale x 4 x i32> [[WIDE_LOAD]], zeroinitializer
93; CHECK-NEXT:    call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_LOAD]], <vscale x 4 x ptr> [[BROADCAST_SPLAT]], i32 4, <vscale x 4 x i1> [[TMP9]])
94; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
95; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
96; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
97; CHECK:       middle.block:
98; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
99; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
100; CHECK:       scalar.ph:
101; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
102; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
103; CHECK:       for.body:
104; CHECK-NEXT:    [[I_09:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
105; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[I_09]]
106; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
107; CHECK-NEXT:    [[CMP1:%.*]] = icmp sgt i32 [[TMP11]], 0
108; CHECK-NEXT:    br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]]
109; CHECK:       if.then:
110; CHECK-NEXT:    store i32 [[TMP11]], ptr [[DST]], align 4
111; CHECK-NEXT:    br label [[FOR_INC]]
112; CHECK:       for.inc:
113; CHECK-NEXT:    [[INC]] = add nuw nsw i64 [[I_09]], 1
114; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]]
115; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
116; CHECK:       for.end:
117; CHECK-NEXT:    ret void
118;
119entry:
120  br label %for.body
121
122for.body:                                         ; preds = %entry, %for.inc
123  %i.09 = phi i64 [ %inc, %for.inc ], [ 0, %entry ]
124  %arrayidx = getelementptr inbounds i32, ptr %src, i64 %i.09
125  %0 = load i32, ptr %arrayidx, align 4
126  %cmp1 = icmp sgt i32 %0, 0
127  br i1 %cmp1, label %if.then, label %for.inc
128
129if.then:                                          ; preds = %for.body
130  store i32 %0, ptr %dst, align 4
131  br label %for.inc
132
133for.inc:                                          ; preds = %for.body, %if.then
134  %inc = add nuw nsw i64 %i.09, 1
135  %exitcond.not = icmp eq i64 %inc, %N
136  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
137
138for.end:                                          ; preds = %for.inc, %entry
139  ret void
140}
141
142attributes #0 = { "target-features"="+neon,+sve" vscale_range(1, 16) }
143
144!0 = distinct !{!0, !1, !2, !3, !4, !5}
145!1 = !{!"llvm.loop.mustprogress"}
146!2 = !{!"llvm.loop.vectorize.width", i32 4}
147!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
148!4 = !{!"llvm.loop.vectorize.enable", i1 true}
149!5 = !{!"llvm.loop.interleave.count", i32 1}
150