xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll (revision a611d67601528cb18ae26794a1482cff59ca5254)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f -target-abi=lp64f \
3; RUN:     | FileCheck %s
4
5define void @sink_splat_mul(ptr nocapture %a, i32 signext %x) {
6; CHECK-LABEL: sink_splat_mul:
7; CHECK:       # %bb.0: # %entry
8; CHECK-NEXT:    lui a2, 1
9; CHECK-NEXT:    add a2, a0, a2
10; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
11; CHECK-NEXT:  .LBB0_1: # %vector.body
12; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
13; CHECK-NEXT:    vle32.v v8, (a0)
14; CHECK-NEXT:    vmul.vx v8, v8, a1
15; CHECK-NEXT:    vse32.v v8, (a0)
16; CHECK-NEXT:    addi a0, a0, 16
17; CHECK-NEXT:    bne a0, a2, .LBB0_1
18; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
19; CHECK-NEXT:    ret
20entry:
21  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
22  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
23  br label %vector.body
24
25vector.body:                                      ; preds = %vector.body, %entry
26  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
27  %0 = getelementptr inbounds i32, ptr %a, i64 %index
28  %wide.load = load <4 x i32>, ptr %0, align 4
29  %1 = mul <4 x i32> %wide.load, %broadcast.splat
30  store <4 x i32> %1, ptr %0, align 4
31  %index.next = add nuw i64 %index, 4
32  %2 = icmp eq i64 %index.next, 1024
33  br i1 %2, label %for.cond.cleanup, label %vector.body
34
35for.cond.cleanup:                                 ; preds = %vector.body
36  ret void
37}
38
39define void @sink_splat_add(ptr nocapture %a, i32 signext %x) {
40; CHECK-LABEL: sink_splat_add:
41; CHECK:       # %bb.0: # %entry
42; CHECK-NEXT:    lui a2, 1
43; CHECK-NEXT:    add a2, a0, a2
44; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
45; CHECK-NEXT:  .LBB1_1: # %vector.body
46; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
47; CHECK-NEXT:    vle32.v v8, (a0)
48; CHECK-NEXT:    vadd.vx v8, v8, a1
49; CHECK-NEXT:    vse32.v v8, (a0)
50; CHECK-NEXT:    addi a0, a0, 16
51; CHECK-NEXT:    bne a0, a2, .LBB1_1
52; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
53; CHECK-NEXT:    ret
54entry:
55  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
56  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
57  br label %vector.body
58
59vector.body:                                      ; preds = %vector.body, %entry
60  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
61  %0 = getelementptr inbounds i32, ptr %a, i64 %index
62  %wide.load = load <4 x i32>, ptr %0, align 4
63  %1 = add <4 x i32> %wide.load, %broadcast.splat
64  store <4 x i32> %1, ptr %0, align 4
65  %index.next = add nuw i64 %index, 4
66  %2 = icmp eq i64 %index.next, 1024
67  br i1 %2, label %for.cond.cleanup, label %vector.body
68
69for.cond.cleanup:                                 ; preds = %vector.body
70  ret void
71}
72
73define void @sink_splat_sub(ptr nocapture %a, i32 signext %x) {
74; CHECK-LABEL: sink_splat_sub:
75; CHECK:       # %bb.0: # %entry
76; CHECK-NEXT:    lui a2, 1
77; CHECK-NEXT:    add a2, a0, a2
78; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
79; CHECK-NEXT:  .LBB2_1: # %vector.body
80; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
81; CHECK-NEXT:    vle32.v v8, (a0)
82; CHECK-NEXT:    vsub.vx v8, v8, a1
83; CHECK-NEXT:    vse32.v v8, (a0)
84; CHECK-NEXT:    addi a0, a0, 16
85; CHECK-NEXT:    bne a0, a2, .LBB2_1
86; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
87; CHECK-NEXT:    ret
88entry:
89  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
90  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
91  br label %vector.body
92
93vector.body:                                      ; preds = %vector.body, %entry
94  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
95  %0 = getelementptr inbounds i32, ptr %a, i64 %index
96  %wide.load = load <4 x i32>, ptr %0, align 4
97  %1 = sub <4 x i32> %wide.load, %broadcast.splat
98  store <4 x i32> %1, ptr %0, align 4
99  %index.next = add nuw i64 %index, 4
100  %2 = icmp eq i64 %index.next, 1024
101  br i1 %2, label %for.cond.cleanup, label %vector.body
102
103for.cond.cleanup:                                 ; preds = %vector.body
104  ret void
105}
106
107define void @sink_splat_rsub(ptr nocapture %a, i32 signext %x) {
108; CHECK-LABEL: sink_splat_rsub:
109; CHECK:       # %bb.0: # %entry
110; CHECK-NEXT:    lui a2, 1
111; CHECK-NEXT:    add a2, a0, a2
112; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
113; CHECK-NEXT:  .LBB3_1: # %vector.body
114; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
115; CHECK-NEXT:    vle32.v v8, (a0)
116; CHECK-NEXT:    vrsub.vx v8, v8, a1
117; CHECK-NEXT:    vse32.v v8, (a0)
118; CHECK-NEXT:    addi a0, a0, 16
119; CHECK-NEXT:    bne a0, a2, .LBB3_1
120; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
121; CHECK-NEXT:    ret
122entry:
123  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
124  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
125  br label %vector.body
126
127vector.body:                                      ; preds = %vector.body, %entry
128  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
129  %0 = getelementptr inbounds i32, ptr %a, i64 %index
130  %wide.load = load <4 x i32>, ptr %0, align 4
131  %1 = sub <4 x i32> %broadcast.splat, %wide.load
132  store <4 x i32> %1, ptr %0, align 4
133  %index.next = add nuw i64 %index, 4
134  %2 = icmp eq i64 %index.next, 1024
135  br i1 %2, label %for.cond.cleanup, label %vector.body
136
137for.cond.cleanup:                                 ; preds = %vector.body
138  ret void
139}
140
141define void @sink_splat_and(ptr nocapture %a, i32 signext %x) {
142; CHECK-LABEL: sink_splat_and:
143; CHECK:       # %bb.0: # %entry
144; CHECK-NEXT:    lui a2, 1
145; CHECK-NEXT:    add a2, a0, a2
146; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
147; CHECK-NEXT:  .LBB4_1: # %vector.body
148; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
149; CHECK-NEXT:    vle32.v v8, (a0)
150; CHECK-NEXT:    vand.vx v8, v8, a1
151; CHECK-NEXT:    vse32.v v8, (a0)
152; CHECK-NEXT:    addi a0, a0, 16
153; CHECK-NEXT:    bne a0, a2, .LBB4_1
154; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
155; CHECK-NEXT:    ret
156entry:
157  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
158  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
159  br label %vector.body
160
161vector.body:                                      ; preds = %vector.body, %entry
162  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
163  %0 = getelementptr inbounds i32, ptr %a, i64 %index
164  %wide.load = load <4 x i32>, ptr %0, align 4
165  %1 = and <4 x i32> %wide.load, %broadcast.splat
166  store <4 x i32> %1, ptr %0, align 4
167  %index.next = add nuw i64 %index, 4
168  %2 = icmp eq i64 %index.next, 1024
169  br i1 %2, label %for.cond.cleanup, label %vector.body
170
171for.cond.cleanup:                                 ; preds = %vector.body
172  ret void
173}
174
175define void @sink_splat_or(ptr nocapture %a, i32 signext %x) {
176; CHECK-LABEL: sink_splat_or:
177; CHECK:       # %bb.0: # %entry
178; CHECK-NEXT:    lui a2, 1
179; CHECK-NEXT:    add a2, a0, a2
180; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
181; CHECK-NEXT:  .LBB5_1: # %vector.body
182; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
183; CHECK-NEXT:    vle32.v v8, (a0)
184; CHECK-NEXT:    vor.vx v8, v8, a1
185; CHECK-NEXT:    vse32.v v8, (a0)
186; CHECK-NEXT:    addi a0, a0, 16
187; CHECK-NEXT:    bne a0, a2, .LBB5_1
188; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
189; CHECK-NEXT:    ret
190entry:
191  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
192  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
193  br label %vector.body
194
195vector.body:                                      ; preds = %vector.body, %entry
196  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
197  %0 = getelementptr inbounds i32, ptr %a, i64 %index
198  %wide.load = load <4 x i32>, ptr %0, align 4
199  %1 = or <4 x i32> %wide.load, %broadcast.splat
200  store <4 x i32> %1, ptr %0, align 4
201  %index.next = add nuw i64 %index, 4
202  %2 = icmp eq i64 %index.next, 1024
203  br i1 %2, label %for.cond.cleanup, label %vector.body
204
205for.cond.cleanup:                                 ; preds = %vector.body
206  ret void
207}
208
209define void @sink_splat_xor(ptr nocapture %a, i32 signext %x) {
210; CHECK-LABEL: sink_splat_xor:
211; CHECK:       # %bb.0: # %entry
212; CHECK-NEXT:    lui a2, 1
213; CHECK-NEXT:    add a2, a0, a2
214; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
215; CHECK-NEXT:  .LBB6_1: # %vector.body
216; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
217; CHECK-NEXT:    vle32.v v8, (a0)
218; CHECK-NEXT:    vxor.vx v8, v8, a1
219; CHECK-NEXT:    vse32.v v8, (a0)
220; CHECK-NEXT:    addi a0, a0, 16
221; CHECK-NEXT:    bne a0, a2, .LBB6_1
222; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
223; CHECK-NEXT:    ret
224entry:
225  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
226  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
227  br label %vector.body
228
229vector.body:                                      ; preds = %vector.body, %entry
230  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
231  %0 = getelementptr inbounds i32, ptr %a, i64 %index
232  %wide.load = load <4 x i32>, ptr %0, align 4
233  %1 = xor <4 x i32> %wide.load, %broadcast.splat
234  store <4 x i32> %1, ptr %0, align 4
235  %index.next = add nuw i64 %index, 4
236  %2 = icmp eq i64 %index.next, 1024
237  br i1 %2, label %for.cond.cleanup, label %vector.body
238
239for.cond.cleanup:                                 ; preds = %vector.body
240  ret void
241}
242
243define void @sink_splat_mul_scalable(ptr nocapture %a, i32 signext %x) {
244; CHECK-LABEL: sink_splat_mul_scalable:
245; CHECK:       # %bb.0: # %entry
246; CHECK-NEXT:    csrr a5, vlenb
247; CHECK-NEXT:    srli a3, a5, 1
248; CHECK-NEXT:    li a2, 1024
249; CHECK-NEXT:    bgeu a2, a3, .LBB7_2
250; CHECK-NEXT:  # %bb.1:
251; CHECK-NEXT:    li a2, 0
252; CHECK-NEXT:    j .LBB7_5
253; CHECK-NEXT:  .LBB7_2: # %vector.ph
254; CHECK-NEXT:    addi a2, a3, -1
255; CHECK-NEXT:    andi a4, a2, 1024
256; CHECK-NEXT:    xori a2, a4, 1024
257; CHECK-NEXT:    slli a5, a5, 1
258; CHECK-NEXT:    mv a6, a0
259; CHECK-NEXT:    mv a7, a2
260; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
261; CHECK-NEXT:  .LBB7_3: # %vector.body
262; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
263; CHECK-NEXT:    vl2re32.v v8, (a6)
264; CHECK-NEXT:    sub a7, a7, a3
265; CHECK-NEXT:    vmul.vx v8, v8, a1
266; CHECK-NEXT:    vs2r.v v8, (a6)
267; CHECK-NEXT:    add a6, a6, a5
268; CHECK-NEXT:    bnez a7, .LBB7_3
269; CHECK-NEXT:  # %bb.4: # %middle.block
270; CHECK-NEXT:    beqz a4, .LBB7_7
271; CHECK-NEXT:  .LBB7_5: # %for.body.preheader
272; CHECK-NEXT:    slli a2, a2, 2
273; CHECK-NEXT:    lui a3, 1
274; CHECK-NEXT:    add a2, a0, a2
275; CHECK-NEXT:    add a0, a0, a3
276; CHECK-NEXT:  .LBB7_6: # %for.body
277; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
278; CHECK-NEXT:    lw a3, 0(a2)
279; CHECK-NEXT:    mul a3, a3, a1
280; CHECK-NEXT:    sw a3, 0(a2)
281; CHECK-NEXT:    addi a2, a2, 4
282; CHECK-NEXT:    bne a2, a0, .LBB7_6
283; CHECK-NEXT:  .LBB7_7: # %for.cond.cleanup
284; CHECK-NEXT:    ret
285entry:
286  %0 = call i64 @llvm.vscale.i64()
287  %1 = shl i64 %0, 2
288  %min.iters.check = icmp ugt i64 %1, 1024
289  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
290
291vector.ph:                                        ; preds = %entry
292  %2 = call i64 @llvm.vscale.i64()
293  %3 = shl i64 %2, 2
294  %n.mod.vf = urem i64 1024, %3
295  %n.vec = sub nsw i64 1024, %n.mod.vf
296  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
297  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
298  %4 = call i64 @llvm.vscale.i64()
299  %5 = shl i64 %4, 2
300  br label %vector.body
301
302vector.body:                                      ; preds = %vector.body, %vector.ph
303  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
304  %6 = getelementptr inbounds i32, ptr %a, i64 %index
305  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
306  %7 = mul <vscale x 4 x i32> %wide.load, %broadcast.splat
307  store <vscale x 4 x i32> %7, ptr %6, align 4
308  %index.next = add nuw i64 %index, %5
309  %8 = icmp eq i64 %index.next, %n.vec
310  br i1 %8, label %middle.block, label %vector.body
311
312middle.block:                                     ; preds = %vector.body
313  %cmp.n = icmp eq i64 %n.mod.vf, 0
314  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
315
316for.body.preheader:                               ; preds = %entry, %middle.block
317  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
318  br label %for.body
319
320for.cond.cleanup:                                 ; preds = %for.body, %middle.block
321  ret void
322
323for.body:                                         ; preds = %for.body.preheader, %for.body
324  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
325  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
326  %9 = load i32, ptr %arrayidx, align 4
327  %mul = mul i32 %9, %x
328  store i32 %mul, ptr %arrayidx, align 4
329  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
330  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
331  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
332}
333
334define void @sink_splat_add_scalable(ptr nocapture %a, i32 signext %x) {
335; CHECK-LABEL: sink_splat_add_scalable:
336; CHECK:       # %bb.0: # %entry
337; CHECK-NEXT:    csrr a5, vlenb
338; CHECK-NEXT:    srli a3, a5, 1
339; CHECK-NEXT:    li a2, 1024
340; CHECK-NEXT:    bgeu a2, a3, .LBB8_2
341; CHECK-NEXT:  # %bb.1:
342; CHECK-NEXT:    li a2, 0
343; CHECK-NEXT:    j .LBB8_5
344; CHECK-NEXT:  .LBB8_2: # %vector.ph
345; CHECK-NEXT:    addi a2, a3, -1
346; CHECK-NEXT:    andi a4, a2, 1024
347; CHECK-NEXT:    xori a2, a4, 1024
348; CHECK-NEXT:    slli a5, a5, 1
349; CHECK-NEXT:    mv a6, a0
350; CHECK-NEXT:    mv a7, a2
351; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
352; CHECK-NEXT:  .LBB8_3: # %vector.body
353; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
354; CHECK-NEXT:    vl2re32.v v8, (a6)
355; CHECK-NEXT:    sub a7, a7, a3
356; CHECK-NEXT:    vadd.vx v8, v8, a1
357; CHECK-NEXT:    vs2r.v v8, (a6)
358; CHECK-NEXT:    add a6, a6, a5
359; CHECK-NEXT:    bnez a7, .LBB8_3
360; CHECK-NEXT:  # %bb.4: # %middle.block
361; CHECK-NEXT:    beqz a4, .LBB8_7
362; CHECK-NEXT:  .LBB8_5: # %for.body.preheader
363; CHECK-NEXT:    slli a2, a2, 2
364; CHECK-NEXT:    lui a3, 1
365; CHECK-NEXT:    add a2, a0, a2
366; CHECK-NEXT:    add a0, a0, a3
367; CHECK-NEXT:  .LBB8_6: # %for.body
368; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
369; CHECK-NEXT:    lw a3, 0(a2)
370; CHECK-NEXT:    add a3, a3, a1
371; CHECK-NEXT:    sw a3, 0(a2)
372; CHECK-NEXT:    addi a2, a2, 4
373; CHECK-NEXT:    bne a2, a0, .LBB8_6
374; CHECK-NEXT:  .LBB8_7: # %for.cond.cleanup
375; CHECK-NEXT:    ret
376entry:
377  %0 = call i64 @llvm.vscale.i64()
378  %1 = shl i64 %0, 2
379  %min.iters.check = icmp ugt i64 %1, 1024
380  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
381
382vector.ph:                                        ; preds = %entry
383  %2 = call i64 @llvm.vscale.i64()
384  %3 = shl i64 %2, 2
385  %n.mod.vf = urem i64 1024, %3
386  %n.vec = sub nsw i64 1024, %n.mod.vf
387  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
388  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
389  %4 = call i64 @llvm.vscale.i64()
390  %5 = shl i64 %4, 2
391  br label %vector.body
392
393vector.body:                                      ; preds = %vector.body, %vector.ph
394  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
395  %6 = getelementptr inbounds i32, ptr %a, i64 %index
396  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
397  %7 = add <vscale x 4 x i32> %wide.load, %broadcast.splat
398  store <vscale x 4 x i32> %7, ptr %6, align 4
399  %index.next = add nuw i64 %index, %5
400  %8 = icmp eq i64 %index.next, %n.vec
401  br i1 %8, label %middle.block, label %vector.body
402
403middle.block:                                     ; preds = %vector.body
404  %cmp.n = icmp eq i64 %n.mod.vf, 0
405  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
406
407for.body.preheader:                               ; preds = %entry, %middle.block
408  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
409  br label %for.body
410
411for.cond.cleanup:                                 ; preds = %for.body, %middle.block
412  ret void
413
414for.body:                                         ; preds = %for.body.preheader, %for.body
415  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
416  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
417  %9 = load i32, ptr %arrayidx, align 4
418  %add = add i32 %9, %x
419  store i32 %add, ptr %arrayidx, align 4
420  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
421  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
422  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
423}
424
425define void @sink_splat_sub_scalable(ptr nocapture %a, i32 signext %x) {
426; CHECK-LABEL: sink_splat_sub_scalable:
427; CHECK:       # %bb.0: # %entry
428; CHECK-NEXT:    csrr a5, vlenb
429; CHECK-NEXT:    srli a3, a5, 1
430; CHECK-NEXT:    li a2, 1024
431; CHECK-NEXT:    bgeu a2, a3, .LBB9_2
432; CHECK-NEXT:  # %bb.1:
433; CHECK-NEXT:    li a2, 0
434; CHECK-NEXT:    j .LBB9_5
435; CHECK-NEXT:  .LBB9_2: # %vector.ph
436; CHECK-NEXT:    addi a2, a3, -1
437; CHECK-NEXT:    andi a4, a2, 1024
438; CHECK-NEXT:    xori a2, a4, 1024
439; CHECK-NEXT:    slli a5, a5, 1
440; CHECK-NEXT:    mv a6, a0
441; CHECK-NEXT:    mv a7, a2
442; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
443; CHECK-NEXT:  .LBB9_3: # %vector.body
444; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
445; CHECK-NEXT:    vl2re32.v v8, (a6)
446; CHECK-NEXT:    sub a7, a7, a3
447; CHECK-NEXT:    vsub.vx v8, v8, a1
448; CHECK-NEXT:    vs2r.v v8, (a6)
449; CHECK-NEXT:    add a6, a6, a5
450; CHECK-NEXT:    bnez a7, .LBB9_3
451; CHECK-NEXT:  # %bb.4: # %middle.block
452; CHECK-NEXT:    beqz a4, .LBB9_7
453; CHECK-NEXT:  .LBB9_5: # %for.body.preheader
454; CHECK-NEXT:    slli a2, a2, 2
455; CHECK-NEXT:    lui a3, 1
456; CHECK-NEXT:    add a2, a0, a2
457; CHECK-NEXT:    add a0, a0, a3
458; CHECK-NEXT:  .LBB9_6: # %for.body
459; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
460; CHECK-NEXT:    lw a3, 0(a2)
461; CHECK-NEXT:    add a3, a3, a1
462; CHECK-NEXT:    sw a3, 0(a2)
463; CHECK-NEXT:    addi a2, a2, 4
464; CHECK-NEXT:    bne a2, a0, .LBB9_6
465; CHECK-NEXT:  .LBB9_7: # %for.cond.cleanup
466; CHECK-NEXT:    ret
467entry:
468  %0 = call i64 @llvm.vscale.i64()
469  %1 = shl i64 %0, 2
470  %min.iters.check = icmp ugt i64 %1, 1024
471  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
472
473vector.ph:                                        ; preds = %entry
474  %2 = call i64 @llvm.vscale.i64()
475  %3 = shl i64 %2, 2
476  %n.mod.vf = urem i64 1024, %3
477  %n.vec = sub nsw i64 1024, %n.mod.vf
478  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
479  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
480  %4 = call i64 @llvm.vscale.i64()
481  %5 = shl i64 %4, 2
482  br label %vector.body
483
484vector.body:                                      ; preds = %vector.body, %vector.ph
485  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
486  %6 = getelementptr inbounds i32, ptr %a, i64 %index
487  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
488  %7 = sub <vscale x 4 x i32> %wide.load, %broadcast.splat
489  store <vscale x 4 x i32> %7, ptr %6, align 4
490  %index.next = add nuw i64 %index, %5
491  %8 = icmp eq i64 %index.next, %n.vec
492  br i1 %8, label %middle.block, label %vector.body
493
494middle.block:                                     ; preds = %vector.body
495  %cmp.n = icmp eq i64 %n.mod.vf, 0
496  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
497
498for.body.preheader:                               ; preds = %entry, %middle.block
499  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
500  br label %for.body
501
502for.cond.cleanup:                                 ; preds = %for.body, %middle.block
503  ret void
504
505for.body:                                         ; preds = %for.body.preheader, %for.body
506  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
507  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
508  %9 = load i32, ptr %arrayidx, align 4
509  %add = add i32 %9, %x
510  store i32 %add, ptr %arrayidx, align 4
511  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
512  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
513  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
514}
515
516define void @sink_splat_rsub_scalable(ptr nocapture %a, i32 signext %x) {
517; CHECK-LABEL: sink_splat_rsub_scalable:
518; CHECK:       # %bb.0: # %entry
519; CHECK-NEXT:    csrr a5, vlenb
520; CHECK-NEXT:    srli a3, a5, 1
521; CHECK-NEXT:    li a2, 1024
522; CHECK-NEXT:    bgeu a2, a3, .LBB10_2
523; CHECK-NEXT:  # %bb.1:
524; CHECK-NEXT:    li a2, 0
525; CHECK-NEXT:    j .LBB10_5
526; CHECK-NEXT:  .LBB10_2: # %vector.ph
527; CHECK-NEXT:    addi a2, a3, -1
528; CHECK-NEXT:    andi a4, a2, 1024
529; CHECK-NEXT:    xori a2, a4, 1024
530; CHECK-NEXT:    slli a5, a5, 1
531; CHECK-NEXT:    mv a6, a0
532; CHECK-NEXT:    mv a7, a2
533; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
534; CHECK-NEXT:  .LBB10_3: # %vector.body
535; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
536; CHECK-NEXT:    vl2re32.v v8, (a6)
537; CHECK-NEXT:    sub a7, a7, a3
538; CHECK-NEXT:    vrsub.vx v8, v8, a1
539; CHECK-NEXT:    vs2r.v v8, (a6)
540; CHECK-NEXT:    add a6, a6, a5
541; CHECK-NEXT:    bnez a7, .LBB10_3
542; CHECK-NEXT:  # %bb.4: # %middle.block
543; CHECK-NEXT:    beqz a4, .LBB10_7
544; CHECK-NEXT:  .LBB10_5: # %for.body.preheader
545; CHECK-NEXT:    slli a2, a2, 2
546; CHECK-NEXT:    lui a3, 1
547; CHECK-NEXT:    add a2, a0, a2
548; CHECK-NEXT:    add a0, a0, a3
549; CHECK-NEXT:  .LBB10_6: # %for.body
550; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
551; CHECK-NEXT:    lw a3, 0(a2)
552; CHECK-NEXT:    subw a3, a1, a3
553; CHECK-NEXT:    sw a3, 0(a2)
554; CHECK-NEXT:    addi a2, a2, 4
555; CHECK-NEXT:    bne a2, a0, .LBB10_6
556; CHECK-NEXT:  .LBB10_7: # %for.cond.cleanup
557; CHECK-NEXT:    ret
558entry:
559  %0 = call i64 @llvm.vscale.i64()
560  %1 = shl i64 %0, 2
561  %min.iters.check = icmp ugt i64 %1, 1024
562  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
563
564vector.ph:                                        ; preds = %entry
565  %2 = call i64 @llvm.vscale.i64()
566  %3 = shl i64 %2, 2
567  %n.mod.vf = urem i64 1024, %3
568  %n.vec = sub nsw i64 1024, %n.mod.vf
569  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
570  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
571  %4 = call i64 @llvm.vscale.i64()
572  %5 = shl i64 %4, 2
573  br label %vector.body
574
575vector.body:                                      ; preds = %vector.body, %vector.ph
576  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
577  %6 = getelementptr inbounds i32, ptr %a, i64 %index
578  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
579  %7 = sub <vscale x 4 x i32> %broadcast.splat, %wide.load
580  store <vscale x 4 x i32> %7, ptr %6, align 4
581  %index.next = add nuw i64 %index, %5
582  %8 = icmp eq i64 %index.next, %n.vec
583  br i1 %8, label %middle.block, label %vector.body
584
585middle.block:                                     ; preds = %vector.body
586  %cmp.n = icmp eq i64 %n.mod.vf, 0
587  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
588
589for.body.preheader:                               ; preds = %entry, %middle.block
590  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
591  br label %for.body
592
593for.cond.cleanup:                                 ; preds = %for.body, %middle.block
594  ret void
595
596for.body:                                         ; preds = %for.body.preheader, %for.body
597  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
598  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
599  %9 = load i32, ptr %arrayidx, align 4
600  %add = sub i32 %x, %9
601  store i32 %add, ptr %arrayidx, align 4
602  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
603  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
604  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
605}
606
607define void @sink_splat_and_scalable(ptr nocapture %a, i32 signext %x) {
608; CHECK-LABEL: sink_splat_and_scalable:
609; CHECK:       # %bb.0: # %entry
610; CHECK-NEXT:    csrr a5, vlenb
611; CHECK-NEXT:    srli a3, a5, 1
612; CHECK-NEXT:    li a2, 1024
613; CHECK-NEXT:    bgeu a2, a3, .LBB11_2
614; CHECK-NEXT:  # %bb.1:
615; CHECK-NEXT:    li a2, 0
616; CHECK-NEXT:    j .LBB11_5
617; CHECK-NEXT:  .LBB11_2: # %vector.ph
618; CHECK-NEXT:    addi a2, a3, -1
619; CHECK-NEXT:    andi a4, a2, 1024
620; CHECK-NEXT:    xori a2, a4, 1024
621; CHECK-NEXT:    slli a5, a5, 1
622; CHECK-NEXT:    mv a6, a0
623; CHECK-NEXT:    mv a7, a2
624; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
625; CHECK-NEXT:  .LBB11_3: # %vector.body
626; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
627; CHECK-NEXT:    vl2re32.v v8, (a6)
628; CHECK-NEXT:    sub a7, a7, a3
629; CHECK-NEXT:    vand.vx v8, v8, a1
630; CHECK-NEXT:    vs2r.v v8, (a6)
631; CHECK-NEXT:    add a6, a6, a5
632; CHECK-NEXT:    bnez a7, .LBB11_3
633; CHECK-NEXT:  # %bb.4: # %middle.block
634; CHECK-NEXT:    beqz a4, .LBB11_7
635; CHECK-NEXT:  .LBB11_5: # %for.body.preheader
636; CHECK-NEXT:    slli a2, a2, 2
637; CHECK-NEXT:    lui a3, 1
638; CHECK-NEXT:    add a2, a0, a2
639; CHECK-NEXT:    add a0, a0, a3
640; CHECK-NEXT:  .LBB11_6: # %for.body
641; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
642; CHECK-NEXT:    lw a3, 0(a2)
643; CHECK-NEXT:    and a3, a3, a1
644; CHECK-NEXT:    sw a3, 0(a2)
645; CHECK-NEXT:    addi a2, a2, 4
646; CHECK-NEXT:    bne a2, a0, .LBB11_6
647; CHECK-NEXT:  .LBB11_7: # %for.cond.cleanup
648; CHECK-NEXT:    ret
649entry:
650  %0 = call i64 @llvm.vscale.i64()
651  %1 = shl i64 %0, 2
652  %min.iters.check = icmp ugt i64 %1, 1024
653  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
654
655vector.ph:                                        ; preds = %entry
656  %2 = call i64 @llvm.vscale.i64()
657  %3 = shl i64 %2, 2
658  %n.mod.vf = urem i64 1024, %3
659  %n.vec = sub nsw i64 1024, %n.mod.vf
660  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
661  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
662  %4 = call i64 @llvm.vscale.i64()
663  %5 = shl i64 %4, 2
664  br label %vector.body
665
666vector.body:                                      ; preds = %vector.body, %vector.ph
667  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
668  %6 = getelementptr inbounds i32, ptr %a, i64 %index
669  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
670  %7 = and <vscale x 4 x i32> %wide.load, %broadcast.splat
671  store <vscale x 4 x i32> %7, ptr %6, align 4
672  %index.next = add nuw i64 %index, %5
673  %8 = icmp eq i64 %index.next, %n.vec
674  br i1 %8, label %middle.block, label %vector.body
675
676middle.block:                                     ; preds = %vector.body
677  %cmp.n = icmp eq i64 %n.mod.vf, 0
678  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
679
680for.body.preheader:                               ; preds = %entry, %middle.block
681  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
682  br label %for.body
683
684for.cond.cleanup:                                 ; preds = %for.body, %middle.block
685  ret void
686
687for.body:                                         ; preds = %for.body.preheader, %for.body
688  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
689  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
690  %9 = load i32, ptr %arrayidx, align 4
691  %and = and i32 %9, %x
692  store i32 %and, ptr %arrayidx, align 4
693  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
694  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
695  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
696}
697
698define void @sink_splat_or_scalable(ptr nocapture %a, i32 signext %x) {
699; CHECK-LABEL: sink_splat_or_scalable:
700; CHECK:       # %bb.0: # %entry
701; CHECK-NEXT:    csrr a5, vlenb
702; CHECK-NEXT:    srli a3, a5, 1
703; CHECK-NEXT:    li a2, 1024
704; CHECK-NEXT:    bgeu a2, a3, .LBB12_2
705; CHECK-NEXT:  # %bb.1:
706; CHECK-NEXT:    li a2, 0
707; CHECK-NEXT:    j .LBB12_5
708; CHECK-NEXT:  .LBB12_2: # %vector.ph
709; CHECK-NEXT:    addi a2, a3, -1
710; CHECK-NEXT:    andi a4, a2, 1024
711; CHECK-NEXT:    xori a2, a4, 1024
712; CHECK-NEXT:    slli a5, a5, 1
713; CHECK-NEXT:    mv a6, a0
714; CHECK-NEXT:    mv a7, a2
715; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
716; CHECK-NEXT:  .LBB12_3: # %vector.body
717; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
718; CHECK-NEXT:    vl2re32.v v8, (a6)
719; CHECK-NEXT:    sub a7, a7, a3
720; CHECK-NEXT:    vor.vx v8, v8, a1
721; CHECK-NEXT:    vs2r.v v8, (a6)
722; CHECK-NEXT:    add a6, a6, a5
723; CHECK-NEXT:    bnez a7, .LBB12_3
724; CHECK-NEXT:  # %bb.4: # %middle.block
725; CHECK-NEXT:    beqz a4, .LBB12_7
726; CHECK-NEXT:  .LBB12_5: # %for.body.preheader
727; CHECK-NEXT:    slli a2, a2, 2
728; CHECK-NEXT:    lui a3, 1
729; CHECK-NEXT:    add a2, a0, a2
730; CHECK-NEXT:    add a0, a0, a3
731; CHECK-NEXT:  .LBB12_6: # %for.body
732; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
733; CHECK-NEXT:    lw a3, 0(a2)
734; CHECK-NEXT:    or a3, a3, a1
735; CHECK-NEXT:    sw a3, 0(a2)
736; CHECK-NEXT:    addi a2, a2, 4
737; CHECK-NEXT:    bne a2, a0, .LBB12_6
738; CHECK-NEXT:  .LBB12_7: # %for.cond.cleanup
739; CHECK-NEXT:    ret
740entry:
741  %0 = call i64 @llvm.vscale.i64()
742  %1 = shl i64 %0, 2
743  %min.iters.check = icmp ugt i64 %1, 1024
744  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
745
746vector.ph:                                        ; preds = %entry
747  %2 = call i64 @llvm.vscale.i64()
748  %3 = shl i64 %2, 2
749  %n.mod.vf = urem i64 1024, %3
750  %n.vec = sub nsw i64 1024, %n.mod.vf
751  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
752  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
753  %4 = call i64 @llvm.vscale.i64()
754  %5 = shl i64 %4, 2
755  br label %vector.body
756
757vector.body:                                      ; preds = %vector.body, %vector.ph
758  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
759  %6 = getelementptr inbounds i32, ptr %a, i64 %index
760  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
761  %7 = or <vscale x 4 x i32> %wide.load, %broadcast.splat
762  store <vscale x 4 x i32> %7, ptr %6, align 4
763  %index.next = add nuw i64 %index, %5
764  %8 = icmp eq i64 %index.next, %n.vec
765  br i1 %8, label %middle.block, label %vector.body
766
767middle.block:                                     ; preds = %vector.body
768  %cmp.n = icmp eq i64 %n.mod.vf, 0
769  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
770
771for.body.preheader:                               ; preds = %entry, %middle.block
772  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
773  br label %for.body
774
775for.cond.cleanup:                                 ; preds = %for.body, %middle.block
776  ret void
777
778for.body:                                         ; preds = %for.body.preheader, %for.body
779  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
780  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
781  %9 = load i32, ptr %arrayidx, align 4
782  %or = or i32 %9, %x
783  store i32 %or, ptr %arrayidx, align 4
784  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
785  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
786  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
787}
788
789define void @sink_splat_xor_scalable(ptr nocapture %a, i32 signext %x) {
790; CHECK-LABEL: sink_splat_xor_scalable:
791; CHECK:       # %bb.0: # %entry
792; CHECK-NEXT:    csrr a5, vlenb
793; CHECK-NEXT:    srli a3, a5, 1
794; CHECK-NEXT:    li a2, 1024
795; CHECK-NEXT:    bgeu a2, a3, .LBB13_2
796; CHECK-NEXT:  # %bb.1:
797; CHECK-NEXT:    li a2, 0
798; CHECK-NEXT:    j .LBB13_5
799; CHECK-NEXT:  .LBB13_2: # %vector.ph
800; CHECK-NEXT:    addi a2, a3, -1
801; CHECK-NEXT:    andi a4, a2, 1024
802; CHECK-NEXT:    xori a2, a4, 1024
803; CHECK-NEXT:    slli a5, a5, 1
804; CHECK-NEXT:    mv a6, a0
805; CHECK-NEXT:    mv a7, a2
806; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
807; CHECK-NEXT:  .LBB13_3: # %vector.body
808; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
809; CHECK-NEXT:    vl2re32.v v8, (a6)
810; CHECK-NEXT:    sub a7, a7, a3
811; CHECK-NEXT:    vxor.vx v8, v8, a1
812; CHECK-NEXT:    vs2r.v v8, (a6)
813; CHECK-NEXT:    add a6, a6, a5
814; CHECK-NEXT:    bnez a7, .LBB13_3
815; CHECK-NEXT:  # %bb.4: # %middle.block
816; CHECK-NEXT:    beqz a4, .LBB13_7
817; CHECK-NEXT:  .LBB13_5: # %for.body.preheader
818; CHECK-NEXT:    slli a2, a2, 2
819; CHECK-NEXT:    lui a3, 1
820; CHECK-NEXT:    add a2, a0, a2
821; CHECK-NEXT:    add a0, a0, a3
822; CHECK-NEXT:  .LBB13_6: # %for.body
823; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
824; CHECK-NEXT:    lw a3, 0(a2)
825; CHECK-NEXT:    xor a3, a3, a1
826; CHECK-NEXT:    sw a3, 0(a2)
827; CHECK-NEXT:    addi a2, a2, 4
828; CHECK-NEXT:    bne a2, a0, .LBB13_6
829; CHECK-NEXT:  .LBB13_7: # %for.cond.cleanup
830; CHECK-NEXT:    ret
831entry:
832  %0 = call i64 @llvm.vscale.i64()
833  %1 = shl i64 %0, 2
834  %min.iters.check = icmp ugt i64 %1, 1024
835  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
836
837vector.ph:                                        ; preds = %entry
838  %2 = call i64 @llvm.vscale.i64()
839  %3 = shl i64 %2, 2
840  %n.mod.vf = urem i64 1024, %3
841  %n.vec = sub nsw i64 1024, %n.mod.vf
842  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
843  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
844  %4 = call i64 @llvm.vscale.i64()
845  %5 = shl i64 %4, 2
846  br label %vector.body
847
848vector.body:                                      ; preds = %vector.body, %vector.ph
849  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
850  %6 = getelementptr inbounds i32, ptr %a, i64 %index
851  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
852  %7 = xor <vscale x 4 x i32> %wide.load, %broadcast.splat
853  store <vscale x 4 x i32> %7, ptr %6, align 4
854  %index.next = add nuw i64 %index, %5
855  %8 = icmp eq i64 %index.next, %n.vec
856  br i1 %8, label %middle.block, label %vector.body
857
858middle.block:                                     ; preds = %vector.body
859  %cmp.n = icmp eq i64 %n.mod.vf, 0
860  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
861
862for.body.preheader:                               ; preds = %entry, %middle.block
863  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
864  br label %for.body
865
866for.cond.cleanup:                                 ; preds = %for.body, %middle.block
867  ret void
868
869for.body:                                         ; preds = %for.body.preheader, %for.body
870  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
871  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
872  %9 = load i32, ptr %arrayidx, align 4
873  %xor = xor i32 %9, %x
874  store i32 %xor, ptr %arrayidx, align 4
875  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
876  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
877  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
878}
879
880define void @sink_splat_shl(ptr nocapture %a, i32 signext %x) {
881; CHECK-LABEL: sink_splat_shl:
882; CHECK:       # %bb.0: # %entry
883; CHECK-NEXT:    lui a2, 1
884; CHECK-NEXT:    add a2, a0, a2
885; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
886; CHECK-NEXT:  .LBB14_1: # %vector.body
887; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
888; CHECK-NEXT:    vle32.v v8, (a0)
889; CHECK-NEXT:    vsll.vx v8, v8, a1
890; CHECK-NEXT:    vse32.v v8, (a0)
891; CHECK-NEXT:    addi a0, a0, 16
892; CHECK-NEXT:    bne a0, a2, .LBB14_1
893; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
894; CHECK-NEXT:    ret
895entry:
896  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
897  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
898  br label %vector.body
899
900vector.body:                                      ; preds = %vector.body, %entry
901  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
902  %0 = getelementptr inbounds i32, ptr %a, i64 %index
903  %wide.load = load <4 x i32>, ptr %0, align 4
904  %1 = shl <4 x i32> %wide.load, %broadcast.splat
905  store <4 x i32> %1, ptr %0, align 4
906  %index.next = add nuw i64 %index, 4
907  %2 = icmp eq i64 %index.next, 1024
908  br i1 %2, label %for.cond.cleanup, label %vector.body
909
910for.cond.cleanup:                                 ; preds = %vector.body
911  ret void
912}
913
914define void @sink_splat_lshr(ptr nocapture %a, i32 signext %x) {
915; CHECK-LABEL: sink_splat_lshr:
916; CHECK:       # %bb.0: # %entry
917; CHECK-NEXT:    lui a2, 1
918; CHECK-NEXT:    add a2, a0, a2
919; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
920; CHECK-NEXT:  .LBB15_1: # %vector.body
921; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
922; CHECK-NEXT:    vle32.v v8, (a0)
923; CHECK-NEXT:    vsrl.vx v8, v8, a1
924; CHECK-NEXT:    vse32.v v8, (a0)
925; CHECK-NEXT:    addi a0, a0, 16
926; CHECK-NEXT:    bne a0, a2, .LBB15_1
927; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
928; CHECK-NEXT:    ret
929entry:
930  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
931  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
932  br label %vector.body
933
934vector.body:                                      ; preds = %vector.body, %entry
935  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
936  %0 = getelementptr inbounds i32, ptr %a, i64 %index
937  %wide.load = load <4 x i32>, ptr %0, align 4
938  %1 = lshr <4 x i32> %wide.load, %broadcast.splat
939  store <4 x i32> %1, ptr %0, align 4
940  %index.next = add nuw i64 %index, 4
941  %2 = icmp eq i64 %index.next, 1024
942  br i1 %2, label %for.cond.cleanup, label %vector.body
943
944for.cond.cleanup:                                 ; preds = %vector.body
945  ret void
946}
947
948define void @sink_splat_ashr(ptr nocapture %a, i32 signext %x) {
949; CHECK-LABEL: sink_splat_ashr:
950; CHECK:       # %bb.0: # %entry
951; CHECK-NEXT:    lui a2, 1
952; CHECK-NEXT:    add a2, a0, a2
953; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
954; CHECK-NEXT:  .LBB16_1: # %vector.body
955; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
956; CHECK-NEXT:    vle32.v v8, (a0)
957; CHECK-NEXT:    vsra.vx v8, v8, a1
958; CHECK-NEXT:    vse32.v v8, (a0)
959; CHECK-NEXT:    addi a0, a0, 16
960; CHECK-NEXT:    bne a0, a2, .LBB16_1
961; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
962; CHECK-NEXT:    ret
963entry:
964  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
965  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
966  br label %vector.body
967
968vector.body:                                      ; preds = %vector.body, %entry
969  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
970  %0 = getelementptr inbounds i32, ptr %a, i64 %index
971  %wide.load = load <4 x i32>, ptr %0, align 4
972  %1 = ashr <4 x i32> %wide.load, %broadcast.splat
973  store <4 x i32> %1, ptr %0, align 4
974  %index.next = add nuw i64 %index, 4
975  %2 = icmp eq i64 %index.next, 1024
976  br i1 %2, label %for.cond.cleanup, label %vector.body
977
978for.cond.cleanup:                                 ; preds = %vector.body
979  ret void
980}
981
982define void @sink_splat_shl_scalable(ptr nocapture %a, i32 signext %x) {
983; CHECK-LABEL: sink_splat_shl_scalable:
984; CHECK:       # %bb.0: # %entry
985; CHECK-NEXT:    csrr a5, vlenb
986; CHECK-NEXT:    srli a3, a5, 1
987; CHECK-NEXT:    li a2, 1024
988; CHECK-NEXT:    bgeu a2, a3, .LBB17_2
989; CHECK-NEXT:  # %bb.1:
990; CHECK-NEXT:    li a2, 0
991; CHECK-NEXT:    j .LBB17_5
992; CHECK-NEXT:  .LBB17_2: # %vector.ph
993; CHECK-NEXT:    addi a2, a3, -1
994; CHECK-NEXT:    andi a4, a2, 1024
995; CHECK-NEXT:    xori a2, a4, 1024
996; CHECK-NEXT:    slli a5, a5, 1
997; CHECK-NEXT:    mv a6, a0
998; CHECK-NEXT:    mv a7, a2
999; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
1000; CHECK-NEXT:  .LBB17_3: # %vector.body
1001; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1002; CHECK-NEXT:    vl2re32.v v8, (a6)
1003; CHECK-NEXT:    sub a7, a7, a3
1004; CHECK-NEXT:    vsll.vx v8, v8, a1
1005; CHECK-NEXT:    vs2r.v v8, (a6)
1006; CHECK-NEXT:    add a6, a6, a5
1007; CHECK-NEXT:    bnez a7, .LBB17_3
1008; CHECK-NEXT:  # %bb.4: # %middle.block
1009; CHECK-NEXT:    beqz a4, .LBB17_7
1010; CHECK-NEXT:  .LBB17_5: # %for.body.preheader
1011; CHECK-NEXT:    slli a2, a2, 2
1012; CHECK-NEXT:    lui a3, 1
1013; CHECK-NEXT:    add a2, a0, a2
1014; CHECK-NEXT:    add a0, a0, a3
1015; CHECK-NEXT:  .LBB17_6: # %for.body
1016; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1017; CHECK-NEXT:    lw a3, 0(a2)
1018; CHECK-NEXT:    sllw a3, a3, a1
1019; CHECK-NEXT:    sw a3, 0(a2)
1020; CHECK-NEXT:    addi a2, a2, 4
1021; CHECK-NEXT:    bne a2, a0, .LBB17_6
1022; CHECK-NEXT:  .LBB17_7: # %for.cond.cleanup
1023; CHECK-NEXT:    ret
1024entry:
1025  %0 = call i64 @llvm.vscale.i64()
1026  %1 = shl i64 %0, 2
1027  %min.iters.check = icmp ugt i64 %1, 1024
1028  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1029
1030vector.ph:                                        ; preds = %entry
1031  %2 = call i64 @llvm.vscale.i64()
1032  %3 = shl i64 %2, 2
1033  %n.mod.vf = urem i64 1024, %3
1034  %n.vec = sub nsw i64 1024, %n.mod.vf
1035  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
1036  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1037  %4 = call i64 @llvm.vscale.i64()
1038  %5 = shl i64 %4, 2
1039  br label %vector.body
1040
1041vector.body:                                      ; preds = %vector.body, %vector.ph
1042  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1043  %6 = getelementptr inbounds i32, ptr %a, i64 %index
1044  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
1045  %7 = shl <vscale x 4 x i32> %wide.load, %broadcast.splat
1046  store <vscale x 4 x i32> %7, ptr %6, align 4
1047  %index.next = add nuw i64 %index, %5
1048  %8 = icmp eq i64 %index.next, %n.vec
1049  br i1 %8, label %middle.block, label %vector.body
1050
1051middle.block:                                     ; preds = %vector.body
1052  %cmp.n = icmp eq i64 %n.mod.vf, 0
1053  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1054
1055for.body.preheader:                               ; preds = %entry, %middle.block
1056  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1057  br label %for.body
1058
1059for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1060  ret void
1061
1062for.body:                                         ; preds = %for.body.preheader, %for.body
1063  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1064  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1065  %9 = load i32, ptr %arrayidx, align 4
1066  %shl = shl i32 %9, %x
1067  store i32 %shl, ptr %arrayidx, align 4
1068  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1069  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1070  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1071}
1072
1073define void @sink_splat_lshr_scalable(ptr nocapture %a, i32 signext %x) {
1074; CHECK-LABEL: sink_splat_lshr_scalable:
1075; CHECK:       # %bb.0: # %entry
1076; CHECK-NEXT:    csrr a5, vlenb
1077; CHECK-NEXT:    srli a3, a5, 1
1078; CHECK-NEXT:    li a2, 1024
1079; CHECK-NEXT:    bgeu a2, a3, .LBB18_2
1080; CHECK-NEXT:  # %bb.1:
1081; CHECK-NEXT:    li a2, 0
1082; CHECK-NEXT:    j .LBB18_5
1083; CHECK-NEXT:  .LBB18_2: # %vector.ph
1084; CHECK-NEXT:    addi a2, a3, -1
1085; CHECK-NEXT:    andi a4, a2, 1024
1086; CHECK-NEXT:    xori a2, a4, 1024
1087; CHECK-NEXT:    slli a5, a5, 1
1088; CHECK-NEXT:    mv a6, a0
1089; CHECK-NEXT:    mv a7, a2
1090; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
1091; CHECK-NEXT:  .LBB18_3: # %vector.body
1092; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1093; CHECK-NEXT:    vl2re32.v v8, (a6)
1094; CHECK-NEXT:    sub a7, a7, a3
1095; CHECK-NEXT:    vsrl.vx v8, v8, a1
1096; CHECK-NEXT:    vs2r.v v8, (a6)
1097; CHECK-NEXT:    add a6, a6, a5
1098; CHECK-NEXT:    bnez a7, .LBB18_3
1099; CHECK-NEXT:  # %bb.4: # %middle.block
1100; CHECK-NEXT:    beqz a4, .LBB18_7
1101; CHECK-NEXT:  .LBB18_5: # %for.body.preheader
1102; CHECK-NEXT:    slli a2, a2, 2
1103; CHECK-NEXT:    lui a3, 1
1104; CHECK-NEXT:    add a2, a0, a2
1105; CHECK-NEXT:    add a0, a0, a3
1106; CHECK-NEXT:  .LBB18_6: # %for.body
1107; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1108; CHECK-NEXT:    lw a3, 0(a2)
1109; CHECK-NEXT:    srlw a3, a3, a1
1110; CHECK-NEXT:    sw a3, 0(a2)
1111; CHECK-NEXT:    addi a2, a2, 4
1112; CHECK-NEXT:    bne a2, a0, .LBB18_6
1113; CHECK-NEXT:  .LBB18_7: # %for.cond.cleanup
1114; CHECK-NEXT:    ret
1115entry:
1116  %0 = call i64 @llvm.vscale.i64()
1117  %1 = shl i64 %0, 2
1118  %min.iters.check = icmp ugt i64 %1, 1024
1119  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1120
1121vector.ph:                                        ; preds = %entry
1122  %2 = call i64 @llvm.vscale.i64()
1123  %3 = shl i64 %2, 2
1124  %n.mod.vf = urem i64 1024, %3
1125  %n.vec = sub nsw i64 1024, %n.mod.vf
1126  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
1127  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
1128  %4 = call i64 @llvm.vscale.i64()
1129  %5 = shl i64 %4, 2
1130  br label %vector.body
1131
1132vector.body:                                      ; preds = %vector.body, %vector.ph
1133  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1134  %6 = getelementptr inbounds i32, ptr %a, i64 %index
1135  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
1136  %7 = lshr <vscale x 4 x i32> %wide.load, %broadcast.splat
1137  store <vscale x 4 x i32> %7, ptr %6, align 4
1138  %index.next = add nuw i64 %index, %5
1139  %8 = icmp eq i64 %index.next, %n.vec
1140  br i1 %8, label %middle.block, label %vector.body
1141
1142middle.block:                                     ; preds = %vector.body
1143  %cmp.n = icmp eq i64 %n.mod.vf, 0
1144  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1145
1146for.body.preheader:                               ; preds = %entry, %middle.block
1147  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1148  br label %for.body
1149
1150for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1151  ret void
1152
1153for.body:                                         ; preds = %for.body.preheader, %for.body
1154  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1155  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1156  %9 = load i32, ptr %arrayidx, align 4
1157  %lshr = lshr i32 %9, %x
1158  store i32 %lshr, ptr %arrayidx, align 4
1159  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1160  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1161  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1162}
1163
1164define void @sink_splat_ashr_scalable(ptr nocapture %a) {
1165; CHECK-LABEL: sink_splat_ashr_scalable:
1166; CHECK:       # %bb.0: # %entry
1167; CHECK-NEXT:    csrr a4, vlenb
1168; CHECK-NEXT:    srli a2, a4, 1
1169; CHECK-NEXT:    li a1, 1024
1170; CHECK-NEXT:    bgeu a1, a2, .LBB19_2
1171; CHECK-NEXT:  # %bb.1:
1172; CHECK-NEXT:    li a1, 0
1173; CHECK-NEXT:    j .LBB19_5
1174; CHECK-NEXT:  .LBB19_2: # %vector.ph
1175; CHECK-NEXT:    addi a1, a2, -1
1176; CHECK-NEXT:    andi a3, a1, 1024
1177; CHECK-NEXT:    xori a1, a3, 1024
1178; CHECK-NEXT:    slli a4, a4, 1
1179; CHECK-NEXT:    mv a5, a0
1180; CHECK-NEXT:    mv a6, a1
1181; CHECK-NEXT:    vsetvli a7, zero, e32, m2, ta, ma
1182; CHECK-NEXT:  .LBB19_3: # %vector.body
1183; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1184; CHECK-NEXT:    vl2re32.v v8, (a5)
1185; CHECK-NEXT:    sub a6, a6, a2
1186; CHECK-NEXT:    vsra.vi v8, v8, 2
1187; CHECK-NEXT:    vs2r.v v8, (a5)
1188; CHECK-NEXT:    add a5, a5, a4
1189; CHECK-NEXT:    bnez a6, .LBB19_3
1190; CHECK-NEXT:  # %bb.4: # %middle.block
1191; CHECK-NEXT:    beqz a3, .LBB19_7
1192; CHECK-NEXT:  .LBB19_5: # %for.body.preheader
1193; CHECK-NEXT:    slli a1, a1, 2
1194; CHECK-NEXT:    lui a2, 1
1195; CHECK-NEXT:    add a1, a0, a1
1196; CHECK-NEXT:    add a0, a0, a2
1197; CHECK-NEXT:  .LBB19_6: # %for.body
1198; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1199; CHECK-NEXT:    lw a2, 0(a1)
1200; CHECK-NEXT:    srli a2, a2, 2
1201; CHECK-NEXT:    sw a2, 0(a1)
1202; CHECK-NEXT:    addi a1, a1, 4
1203; CHECK-NEXT:    bne a1, a0, .LBB19_6
1204; CHECK-NEXT:  .LBB19_7: # %for.cond.cleanup
1205; CHECK-NEXT:    ret
1206entry:
1207  %0 = call i64 @llvm.vscale.i64()
1208  %1 = shl i64 %0, 2
1209  %min.iters.check = icmp ugt i64 %1, 1024
1210  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1211
1212vector.ph:                                        ; preds = %entry
1213  %2 = call i64 @llvm.vscale.i64()
1214  %3 = shl i64 %2, 2
1215  %n.mod.vf = urem i64 1024, %3
1216  %n.vec = sub nsw i64 1024, %n.mod.vf
1217  %4 = call i64 @llvm.vscale.i64()
1218  %5 = shl i64 %4, 2
1219  br label %vector.body
1220
1221vector.body:                                      ; preds = %vector.body, %vector.ph
1222  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1223  %6 = getelementptr inbounds i32, ptr %a, i64 %index
1224  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
1225  %7 = ashr <vscale x 4 x i32> %wide.load, splat (i32 2)
1226  store <vscale x 4 x i32> %7, ptr %6, align 4
1227  %index.next = add nuw i64 %index, %5
1228  %8 = icmp eq i64 %index.next, %n.vec
1229  br i1 %8, label %middle.block, label %vector.body
1230
1231middle.block:                                     ; preds = %vector.body
1232  %cmp.n = icmp eq i64 %n.mod.vf, 0
1233  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1234
1235for.body.preheader:                               ; preds = %entry, %middle.block
1236  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1237  br label %for.body
1238
1239for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1240  ret void
1241
1242for.body:                                         ; preds = %for.body.preheader, %for.body
1243  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1244  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
1245  %9 = load i32, ptr %arrayidx, align 4
1246  %ashr = ashr i32 %9, 2
1247  store i32 %ashr, ptr %arrayidx, align 4
1248  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1249  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1250  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1251}
1252
1253define void @sink_splat_fmul(ptr nocapture %a, float %x) {
1254; CHECK-LABEL: sink_splat_fmul:
1255; CHECK:       # %bb.0: # %entry
1256; CHECK-NEXT:    lui a1, 1
1257; CHECK-NEXT:    add a1, a0, a1
1258; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1259; CHECK-NEXT:  .LBB20_1: # %vector.body
1260; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1261; CHECK-NEXT:    vle32.v v8, (a0)
1262; CHECK-NEXT:    vfmul.vf v8, v8, fa0
1263; CHECK-NEXT:    vse32.v v8, (a0)
1264; CHECK-NEXT:    addi a0, a0, 16
1265; CHECK-NEXT:    bne a0, a1, .LBB20_1
1266; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
1267; CHECK-NEXT:    ret
1268entry:
1269  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1270  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1271  br label %vector.body
1272
1273vector.body:                                      ; preds = %vector.body, %entry
1274  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1275  %0 = getelementptr inbounds float, ptr %a, i64 %index
1276  %wide.load = load <4 x float>, ptr %0, align 4
1277  %1 = fmul <4 x float> %wide.load, %broadcast.splat
1278  store <4 x float> %1, ptr %0, align 4
1279  %index.next = add nuw i64 %index, 4
1280  %2 = icmp eq i64 %index.next, 1024
1281  br i1 %2, label %for.cond.cleanup, label %vector.body
1282
1283for.cond.cleanup:                                 ; preds = %vector.body
1284  ret void
1285}
1286
1287define void @sink_splat_fdiv(ptr nocapture %a, float %x) {
1288; CHECK-LABEL: sink_splat_fdiv:
1289; CHECK:       # %bb.0: # %entry
1290; CHECK-NEXT:    lui a1, 1
1291; CHECK-NEXT:    add a1, a0, a1
1292; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1293; CHECK-NEXT:  .LBB21_1: # %vector.body
1294; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1295; CHECK-NEXT:    vle32.v v8, (a0)
1296; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
1297; CHECK-NEXT:    vse32.v v8, (a0)
1298; CHECK-NEXT:    addi a0, a0, 16
1299; CHECK-NEXT:    bne a0, a1, .LBB21_1
1300; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
1301; CHECK-NEXT:    ret
1302entry:
1303  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1304  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1305  br label %vector.body
1306
1307vector.body:                                      ; preds = %vector.body, %entry
1308  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1309  %0 = getelementptr inbounds float, ptr %a, i64 %index
1310  %wide.load = load <4 x float>, ptr %0, align 4
1311  %1 = fdiv <4 x float> %wide.load, %broadcast.splat
1312  store <4 x float> %1, ptr %0, align 4
1313  %index.next = add nuw i64 %index, 4
1314  %2 = icmp eq i64 %index.next, 1024
1315  br i1 %2, label %for.cond.cleanup, label %vector.body
1316
1317for.cond.cleanup:                                 ; preds = %vector.body
1318  ret void
1319}
1320
1321define void @sink_splat_frdiv(ptr nocapture %a, float %x) {
1322; CHECK-LABEL: sink_splat_frdiv:
1323; CHECK:       # %bb.0: # %entry
1324; CHECK-NEXT:    lui a1, 1
1325; CHECK-NEXT:    add a1, a0, a1
1326; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1327; CHECK-NEXT:  .LBB22_1: # %vector.body
1328; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1329; CHECK-NEXT:    vle32.v v8, (a0)
1330; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
1331; CHECK-NEXT:    vse32.v v8, (a0)
1332; CHECK-NEXT:    addi a0, a0, 16
1333; CHECK-NEXT:    bne a0, a1, .LBB22_1
1334; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
1335; CHECK-NEXT:    ret
1336entry:
1337  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1338  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1339  br label %vector.body
1340
1341vector.body:                                      ; preds = %vector.body, %entry
1342  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1343  %0 = getelementptr inbounds float, ptr %a, i64 %index
1344  %wide.load = load <4 x float>, ptr %0, align 4
1345  %1 = fdiv <4 x float> %broadcast.splat, %wide.load
1346  store <4 x float> %1, ptr %0, align 4
1347  %index.next = add nuw i64 %index, 4
1348  %2 = icmp eq i64 %index.next, 1024
1349  br i1 %2, label %for.cond.cleanup, label %vector.body
1350
1351for.cond.cleanup:                                 ; preds = %vector.body
1352  ret void
1353}
1354
1355define void @sink_splat_fadd(ptr nocapture %a, float %x) {
1356; CHECK-LABEL: sink_splat_fadd:
1357; CHECK:       # %bb.0: # %entry
1358; CHECK-NEXT:    lui a1, 1
1359; CHECK-NEXT:    add a1, a0, a1
1360; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1361; CHECK-NEXT:  .LBB23_1: # %vector.body
1362; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1363; CHECK-NEXT:    vle32.v v8, (a0)
1364; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1365; CHECK-NEXT:    vse32.v v8, (a0)
1366; CHECK-NEXT:    addi a0, a0, 16
1367; CHECK-NEXT:    bne a0, a1, .LBB23_1
1368; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
1369; CHECK-NEXT:    ret
1370entry:
1371  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1372  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1373  br label %vector.body
1374
1375vector.body:                                      ; preds = %vector.body, %entry
1376  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1377  %0 = getelementptr inbounds float, ptr %a, i64 %index
1378  %wide.load = load <4 x float>, ptr %0, align 4
1379  %1 = fadd <4 x float> %wide.load, %broadcast.splat
1380  store <4 x float> %1, ptr %0, align 4
1381  %index.next = add nuw i64 %index, 4
1382  %2 = icmp eq i64 %index.next, 1024
1383  br i1 %2, label %for.cond.cleanup, label %vector.body
1384
1385for.cond.cleanup:                                 ; preds = %vector.body
1386  ret void
1387}
1388
1389define void @sink_splat_fsub(ptr nocapture %a, float %x) {
1390; CHECK-LABEL: sink_splat_fsub:
1391; CHECK:       # %bb.0: # %entry
1392; CHECK-NEXT:    lui a1, 1
1393; CHECK-NEXT:    add a1, a0, a1
1394; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1395; CHECK-NEXT:  .LBB24_1: # %vector.body
1396; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1397; CHECK-NEXT:    vle32.v v8, (a0)
1398; CHECK-NEXT:    vfsub.vf v8, v8, fa0
1399; CHECK-NEXT:    vse32.v v8, (a0)
1400; CHECK-NEXT:    addi a0, a0, 16
1401; CHECK-NEXT:    bne a0, a1, .LBB24_1
1402; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
1403; CHECK-NEXT:    ret
1404entry:
1405  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1406  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1407  br label %vector.body
1408
1409vector.body:                                      ; preds = %vector.body, %entry
1410  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1411  %0 = getelementptr inbounds float, ptr %a, i64 %index
1412  %wide.load = load <4 x float>, ptr %0, align 4
1413  %1 = fsub <4 x float> %wide.load, %broadcast.splat
1414  store <4 x float> %1, ptr %0, align 4
1415  %index.next = add nuw i64 %index, 4
1416  %2 = icmp eq i64 %index.next, 1024
1417  br i1 %2, label %for.cond.cleanup, label %vector.body
1418
1419for.cond.cleanup:                                 ; preds = %vector.body
1420  ret void
1421}
1422
1423define void @sink_splat_frsub(ptr nocapture %a, float %x) {
1424; CHECK-LABEL: sink_splat_frsub:
1425; CHECK:       # %bb.0: # %entry
1426; CHECK-NEXT:    lui a1, 1
1427; CHECK-NEXT:    add a1, a0, a1
1428; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1429; CHECK-NEXT:  .LBB25_1: # %vector.body
1430; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1431; CHECK-NEXT:    vle32.v v8, (a0)
1432; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
1433; CHECK-NEXT:    vse32.v v8, (a0)
1434; CHECK-NEXT:    addi a0, a0, 16
1435; CHECK-NEXT:    bne a0, a1, .LBB25_1
1436; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
1437; CHECK-NEXT:    ret
1438entry:
1439  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
1440  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
1441  br label %vector.body
1442
1443vector.body:                                      ; preds = %vector.body, %entry
1444  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
1445  %0 = getelementptr inbounds float, ptr %a, i64 %index
1446  %wide.load = load <4 x float>, ptr %0, align 4
1447  %1 = fsub <4 x float> %broadcast.splat, %wide.load
1448  store <4 x float> %1, ptr %0, align 4
1449  %index.next = add nuw i64 %index, 4
1450  %2 = icmp eq i64 %index.next, 1024
1451  br i1 %2, label %for.cond.cleanup, label %vector.body
1452
1453for.cond.cleanup:                                 ; preds = %vector.body
1454  ret void
1455}
1456
1457define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) {
1458; CHECK-LABEL: sink_splat_fmul_scalable:
1459; CHECK:       # %bb.0: # %entry
1460; CHECK-NEXT:    csrr a1, vlenb
1461; CHECK-NEXT:    srli a3, a1, 2
1462; CHECK-NEXT:    li a2, 1024
1463; CHECK-NEXT:    bgeu a2, a3, .LBB26_2
1464; CHECK-NEXT:  # %bb.1:
1465; CHECK-NEXT:    li a2, 0
1466; CHECK-NEXT:    j .LBB26_5
1467; CHECK-NEXT:  .LBB26_2: # %vector.ph
1468; CHECK-NEXT:    addi a2, a3, -1
1469; CHECK-NEXT:    andi a4, a2, 1024
1470; CHECK-NEXT:    xori a2, a4, 1024
1471; CHECK-NEXT:    mv a5, a0
1472; CHECK-NEXT:    mv a6, a2
1473; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
1474; CHECK-NEXT:  .LBB26_3: # %vector.body
1475; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1476; CHECK-NEXT:    vl1re32.v v8, (a5)
1477; CHECK-NEXT:    sub a6, a6, a3
1478; CHECK-NEXT:    vfmul.vf v8, v8, fa0
1479; CHECK-NEXT:    vs1r.v v8, (a5)
1480; CHECK-NEXT:    add a5, a5, a1
1481; CHECK-NEXT:    bnez a6, .LBB26_3
1482; CHECK-NEXT:  # %bb.4: # %middle.block
1483; CHECK-NEXT:    beqz a4, .LBB26_7
1484; CHECK-NEXT:  .LBB26_5: # %for.body.preheader
1485; CHECK-NEXT:    slli a1, a2, 2
1486; CHECK-NEXT:    lui a2, 1
1487; CHECK-NEXT:    add a1, a0, a1
1488; CHECK-NEXT:    add a0, a0, a2
1489; CHECK-NEXT:  .LBB26_6: # %for.body
1490; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1491; CHECK-NEXT:    flw fa5, 0(a1)
1492; CHECK-NEXT:    fmul.s fa5, fa5, fa0
1493; CHECK-NEXT:    fsw fa5, 0(a1)
1494; CHECK-NEXT:    addi a1, a1, 4
1495; CHECK-NEXT:    bne a1, a0, .LBB26_6
1496; CHECK-NEXT:  .LBB26_7: # %for.cond.cleanup
1497; CHECK-NEXT:    ret
1498entry:
1499  %0 = call i64 @llvm.vscale.i64()
1500  %1 = shl i64 %0, 1
1501  %min.iters.check = icmp ugt i64 %1, 1024
1502  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1503
1504vector.ph:                                        ; preds = %entry
1505  %2 = call i64 @llvm.vscale.i64()
1506  %3 = shl i64 %2, 1
1507  %n.mod.vf = urem i64 1024, %3
1508  %n.vec = sub nsw i64 1024, %n.mod.vf
1509  %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1510  %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1511  %4 = call i64 @llvm.vscale.i64()
1512  %5 = shl i64 %4, 1
1513  br label %vector.body
1514
1515vector.body:                                      ; preds = %vector.body, %vector.ph
1516  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1517  %6 = getelementptr inbounds float, ptr %a, i64 %index
1518  %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1519  %7 = fmul <vscale x 2 x float> %wide.load, %broadcast.splat
1520  store <vscale x 2 x float> %7, ptr %6, align 4
1521  %index.next = add nuw i64 %index, %5
1522  %8 = icmp eq i64 %index.next, %n.vec
1523  br i1 %8, label %middle.block, label %vector.body
1524
1525middle.block:                                     ; preds = %vector.body
1526  %cmp.n = icmp eq i64 %n.mod.vf, 0
1527  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1528
1529for.body.preheader:                               ; preds = %entry, %middle.block
1530  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1531  br label %for.body
1532
1533for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1534  ret void
1535
1536for.body:                                         ; preds = %for.body.preheader, %for.body
1537  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1538  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1539  %9 = load float, ptr %arrayidx, align 4
1540  %mul = fmul float %9, %x
1541  store float %mul, ptr %arrayidx, align 4
1542  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1543  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1544  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1545}
1546
1547define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) {
1548; CHECK-LABEL: sink_splat_fdiv_scalable:
1549; CHECK:       # %bb.0: # %entry
1550; CHECK-NEXT:    csrr a1, vlenb
1551; CHECK-NEXT:    srli a3, a1, 2
1552; CHECK-NEXT:    li a2, 1024
1553; CHECK-NEXT:    bgeu a2, a3, .LBB27_2
1554; CHECK-NEXT:  # %bb.1:
1555; CHECK-NEXT:    li a2, 0
1556; CHECK-NEXT:    j .LBB27_5
1557; CHECK-NEXT:  .LBB27_2: # %vector.ph
1558; CHECK-NEXT:    addi a2, a3, -1
1559; CHECK-NEXT:    andi a4, a2, 1024
1560; CHECK-NEXT:    xori a2, a4, 1024
1561; CHECK-NEXT:    mv a5, a0
1562; CHECK-NEXT:    mv a6, a2
1563; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
1564; CHECK-NEXT:  .LBB27_3: # %vector.body
1565; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1566; CHECK-NEXT:    vl1re32.v v8, (a5)
1567; CHECK-NEXT:    sub a6, a6, a3
1568; CHECK-NEXT:    vfdiv.vf v8, v8, fa0
1569; CHECK-NEXT:    vs1r.v v8, (a5)
1570; CHECK-NEXT:    add a5, a5, a1
1571; CHECK-NEXT:    bnez a6, .LBB27_3
1572; CHECK-NEXT:  # %bb.4: # %middle.block
1573; CHECK-NEXT:    beqz a4, .LBB27_7
1574; CHECK-NEXT:  .LBB27_5: # %for.body.preheader
1575; CHECK-NEXT:    slli a1, a2, 2
1576; CHECK-NEXT:    lui a2, 1
1577; CHECK-NEXT:    add a1, a0, a1
1578; CHECK-NEXT:    add a0, a0, a2
1579; CHECK-NEXT:  .LBB27_6: # %for.body
1580; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1581; CHECK-NEXT:    flw fa5, 0(a1)
1582; CHECK-NEXT:    fdiv.s fa5, fa5, fa0
1583; CHECK-NEXT:    fsw fa5, 0(a1)
1584; CHECK-NEXT:    addi a1, a1, 4
1585; CHECK-NEXT:    bne a1, a0, .LBB27_6
1586; CHECK-NEXT:  .LBB27_7: # %for.cond.cleanup
1587; CHECK-NEXT:    ret
1588entry:
1589  %0 = call i64 @llvm.vscale.i64()
1590  %1 = shl i64 %0, 1
1591  %min.iters.check = icmp ugt i64 %1, 1024
1592  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1593
1594vector.ph:                                        ; preds = %entry
1595  %2 = call i64 @llvm.vscale.i64()
1596  %3 = shl i64 %2, 1
1597  %n.mod.vf = urem i64 1024, %3
1598  %n.vec = sub nsw i64 1024, %n.mod.vf
1599  %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1600  %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1601  %4 = call i64 @llvm.vscale.i64()
1602  %5 = shl i64 %4, 1
1603  br label %vector.body
1604
1605vector.body:                                      ; preds = %vector.body, %vector.ph
1606  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1607  %6 = getelementptr inbounds float, ptr %a, i64 %index
1608  %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1609  %7 = fdiv <vscale x 2 x float> %wide.load, %broadcast.splat
1610  store <vscale x 2 x float> %7, ptr %6, align 4
1611  %index.next = add nuw i64 %index, %5
1612  %8 = icmp eq i64 %index.next, %n.vec
1613  br i1 %8, label %middle.block, label %vector.body
1614
1615middle.block:                                     ; preds = %vector.body
1616  %cmp.n = icmp eq i64 %n.mod.vf, 0
1617  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1618
1619for.body.preheader:                               ; preds = %entry, %middle.block
1620  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1621  br label %for.body
1622
1623for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1624  ret void
1625
1626for.body:                                         ; preds = %for.body.preheader, %for.body
1627  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1628  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1629  %9 = load float, ptr %arrayidx, align 4
1630  %mul = fdiv float %9, %x
1631  store float %mul, ptr %arrayidx, align 4
1632  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1633  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1634  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1635}
1636
1637define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) {
1638; CHECK-LABEL: sink_splat_frdiv_scalable:
1639; CHECK:       # %bb.0: # %entry
1640; CHECK-NEXT:    csrr a1, vlenb
1641; CHECK-NEXT:    srli a3, a1, 2
1642; CHECK-NEXT:    li a2, 1024
1643; CHECK-NEXT:    bgeu a2, a3, .LBB28_2
1644; CHECK-NEXT:  # %bb.1:
1645; CHECK-NEXT:    li a2, 0
1646; CHECK-NEXT:    j .LBB28_5
1647; CHECK-NEXT:  .LBB28_2: # %vector.ph
1648; CHECK-NEXT:    addi a2, a3, -1
1649; CHECK-NEXT:    andi a4, a2, 1024
1650; CHECK-NEXT:    xori a2, a4, 1024
1651; CHECK-NEXT:    mv a5, a0
1652; CHECK-NEXT:    mv a6, a2
1653; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
1654; CHECK-NEXT:  .LBB28_3: # %vector.body
1655; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1656; CHECK-NEXT:    vl1re32.v v8, (a5)
1657; CHECK-NEXT:    sub a6, a6, a3
1658; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0
1659; CHECK-NEXT:    vs1r.v v8, (a5)
1660; CHECK-NEXT:    add a5, a5, a1
1661; CHECK-NEXT:    bnez a6, .LBB28_3
1662; CHECK-NEXT:  # %bb.4: # %middle.block
1663; CHECK-NEXT:    beqz a4, .LBB28_7
1664; CHECK-NEXT:  .LBB28_5: # %for.body.preheader
1665; CHECK-NEXT:    slli a1, a2, 2
1666; CHECK-NEXT:    lui a2, 1
1667; CHECK-NEXT:    add a1, a0, a1
1668; CHECK-NEXT:    add a0, a0, a2
1669; CHECK-NEXT:  .LBB28_6: # %for.body
1670; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1671; CHECK-NEXT:    flw fa5, 0(a1)
1672; CHECK-NEXT:    fdiv.s fa5, fa0, fa5
1673; CHECK-NEXT:    fsw fa5, 0(a1)
1674; CHECK-NEXT:    addi a1, a1, 4
1675; CHECK-NEXT:    bne a1, a0, .LBB28_6
1676; CHECK-NEXT:  .LBB28_7: # %for.cond.cleanup
1677; CHECK-NEXT:    ret
1678entry:
1679  %0 = call i64 @llvm.vscale.i64()
1680  %1 = shl i64 %0, 1
1681  %min.iters.check = icmp ugt i64 %1, 1024
1682  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1683
1684vector.ph:                                        ; preds = %entry
1685  %2 = call i64 @llvm.vscale.i64()
1686  %3 = shl i64 %2, 1
1687  %n.mod.vf = urem i64 1024, %3
1688  %n.vec = sub nsw i64 1024, %n.mod.vf
1689  %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1690  %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1691  %4 = call i64 @llvm.vscale.i64()
1692  %5 = shl i64 %4, 1
1693  br label %vector.body
1694
1695vector.body:                                      ; preds = %vector.body, %vector.ph
1696  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1697  %6 = getelementptr inbounds float, ptr %a, i64 %index
1698  %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1699  %7 = fdiv <vscale x 2 x float> %broadcast.splat, %wide.load
1700  store <vscale x 2 x float> %7, ptr %6, align 4
1701  %index.next = add nuw i64 %index, %5
1702  %8 = icmp eq i64 %index.next, %n.vec
1703  br i1 %8, label %middle.block, label %vector.body
1704
1705middle.block:                                     ; preds = %vector.body
1706  %cmp.n = icmp eq i64 %n.mod.vf, 0
1707  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1708
1709for.body.preheader:                               ; preds = %entry, %middle.block
1710  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1711  br label %for.body
1712
1713for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1714  ret void
1715
1716for.body:                                         ; preds = %for.body.preheader, %for.body
1717  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1718  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1719  %9 = load float, ptr %arrayidx, align 4
1720  %mul = fdiv float %x, %9
1721  store float %mul, ptr %arrayidx, align 4
1722  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1723  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1724  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1725}
1726
1727define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) {
1728; CHECK-LABEL: sink_splat_fadd_scalable:
1729; CHECK:       # %bb.0: # %entry
1730; CHECK-NEXT:    csrr a1, vlenb
1731; CHECK-NEXT:    srli a3, a1, 2
1732; CHECK-NEXT:    li a2, 1024
1733; CHECK-NEXT:    bgeu a2, a3, .LBB29_2
1734; CHECK-NEXT:  # %bb.1:
1735; CHECK-NEXT:    li a2, 0
1736; CHECK-NEXT:    j .LBB29_5
1737; CHECK-NEXT:  .LBB29_2: # %vector.ph
1738; CHECK-NEXT:    addi a2, a3, -1
1739; CHECK-NEXT:    andi a4, a2, 1024
1740; CHECK-NEXT:    xori a2, a4, 1024
1741; CHECK-NEXT:    mv a5, a0
1742; CHECK-NEXT:    mv a6, a2
1743; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
1744; CHECK-NEXT:  .LBB29_3: # %vector.body
1745; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1746; CHECK-NEXT:    vl1re32.v v8, (a5)
1747; CHECK-NEXT:    sub a6, a6, a3
1748; CHECK-NEXT:    vfadd.vf v8, v8, fa0
1749; CHECK-NEXT:    vs1r.v v8, (a5)
1750; CHECK-NEXT:    add a5, a5, a1
1751; CHECK-NEXT:    bnez a6, .LBB29_3
1752; CHECK-NEXT:  # %bb.4: # %middle.block
1753; CHECK-NEXT:    beqz a4, .LBB29_7
1754; CHECK-NEXT:  .LBB29_5: # %for.body.preheader
1755; CHECK-NEXT:    slli a1, a2, 2
1756; CHECK-NEXT:    lui a2, 1
1757; CHECK-NEXT:    add a1, a0, a1
1758; CHECK-NEXT:    add a0, a0, a2
1759; CHECK-NEXT:  .LBB29_6: # %for.body
1760; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1761; CHECK-NEXT:    flw fa5, 0(a1)
1762; CHECK-NEXT:    fadd.s fa5, fa5, fa0
1763; CHECK-NEXT:    fsw fa5, 0(a1)
1764; CHECK-NEXT:    addi a1, a1, 4
1765; CHECK-NEXT:    bne a1, a0, .LBB29_6
1766; CHECK-NEXT:  .LBB29_7: # %for.cond.cleanup
1767; CHECK-NEXT:    ret
1768entry:
1769  %0 = call i64 @llvm.vscale.i64()
1770  %1 = shl i64 %0, 1
1771  %min.iters.check = icmp ugt i64 %1, 1024
1772  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1773
1774vector.ph:                                        ; preds = %entry
1775  %2 = call i64 @llvm.vscale.i64()
1776  %3 = shl i64 %2, 1
1777  %n.mod.vf = urem i64 1024, %3
1778  %n.vec = sub nsw i64 1024, %n.mod.vf
1779  %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1780  %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1781  %4 = call i64 @llvm.vscale.i64()
1782  %5 = shl i64 %4, 1
1783  br label %vector.body
1784
1785vector.body:                                      ; preds = %vector.body, %vector.ph
1786  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1787  %6 = getelementptr inbounds float, ptr %a, i64 %index
1788  %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1789  %7 = fadd <vscale x 2 x float> %wide.load, %broadcast.splat
1790  store <vscale x 2 x float> %7, ptr %6, align 4
1791  %index.next = add nuw i64 %index, %5
1792  %8 = icmp eq i64 %index.next, %n.vec
1793  br i1 %8, label %middle.block, label %vector.body
1794
1795middle.block:                                     ; preds = %vector.body
1796  %cmp.n = icmp eq i64 %n.mod.vf, 0
1797  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1798
1799for.body.preheader:                               ; preds = %entry, %middle.block
1800  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1801  br label %for.body
1802
1803for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1804  ret void
1805
1806for.body:                                         ; preds = %for.body.preheader, %for.body
1807  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1808  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1809  %9 = load float, ptr %arrayidx, align 4
1810  %mul = fadd float %9, %x
1811  store float %mul, ptr %arrayidx, align 4
1812  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1813  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1814  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1815}
1816
1817define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) {
1818; CHECK-LABEL: sink_splat_fsub_scalable:
1819; CHECK:       # %bb.0: # %entry
1820; CHECK-NEXT:    csrr a1, vlenb
1821; CHECK-NEXT:    srli a3, a1, 2
1822; CHECK-NEXT:    li a2, 1024
1823; CHECK-NEXT:    bgeu a2, a3, .LBB30_2
1824; CHECK-NEXT:  # %bb.1:
1825; CHECK-NEXT:    li a2, 0
1826; CHECK-NEXT:    j .LBB30_5
1827; CHECK-NEXT:  .LBB30_2: # %vector.ph
1828; CHECK-NEXT:    addi a2, a3, -1
1829; CHECK-NEXT:    andi a4, a2, 1024
1830; CHECK-NEXT:    xori a2, a4, 1024
1831; CHECK-NEXT:    mv a5, a0
1832; CHECK-NEXT:    mv a6, a2
1833; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
1834; CHECK-NEXT:  .LBB30_3: # %vector.body
1835; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1836; CHECK-NEXT:    vl1re32.v v8, (a5)
1837; CHECK-NEXT:    sub a6, a6, a3
1838; CHECK-NEXT:    vfsub.vf v8, v8, fa0
1839; CHECK-NEXT:    vs1r.v v8, (a5)
1840; CHECK-NEXT:    add a5, a5, a1
1841; CHECK-NEXT:    bnez a6, .LBB30_3
1842; CHECK-NEXT:  # %bb.4: # %middle.block
1843; CHECK-NEXT:    beqz a4, .LBB30_7
1844; CHECK-NEXT:  .LBB30_5: # %for.body.preheader
1845; CHECK-NEXT:    slli a1, a2, 2
1846; CHECK-NEXT:    lui a2, 1
1847; CHECK-NEXT:    add a1, a0, a1
1848; CHECK-NEXT:    add a0, a0, a2
1849; CHECK-NEXT:  .LBB30_6: # %for.body
1850; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1851; CHECK-NEXT:    flw fa5, 0(a1)
1852; CHECK-NEXT:    fsub.s fa5, fa5, fa0
1853; CHECK-NEXT:    fsw fa5, 0(a1)
1854; CHECK-NEXT:    addi a1, a1, 4
1855; CHECK-NEXT:    bne a1, a0, .LBB30_6
1856; CHECK-NEXT:  .LBB30_7: # %for.cond.cleanup
1857; CHECK-NEXT:    ret
1858entry:
1859  %0 = call i64 @llvm.vscale.i64()
1860  %1 = shl i64 %0, 1
1861  %min.iters.check = icmp ugt i64 %1, 1024
1862  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1863
1864vector.ph:                                        ; preds = %entry
1865  %2 = call i64 @llvm.vscale.i64()
1866  %3 = shl i64 %2, 1
1867  %n.mod.vf = urem i64 1024, %3
1868  %n.vec = sub nsw i64 1024, %n.mod.vf
1869  %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1870  %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1871  %4 = call i64 @llvm.vscale.i64()
1872  %5 = shl i64 %4, 1
1873  br label %vector.body
1874
1875vector.body:                                      ; preds = %vector.body, %vector.ph
1876  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1877  %6 = getelementptr inbounds float, ptr %a, i64 %index
1878  %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1879  %7 = fsub <vscale x 2 x float> %wide.load, %broadcast.splat
1880  store <vscale x 2 x float> %7, ptr %6, align 4
1881  %index.next = add nuw i64 %index, %5
1882  %8 = icmp eq i64 %index.next, %n.vec
1883  br i1 %8, label %middle.block, label %vector.body
1884
1885middle.block:                                     ; preds = %vector.body
1886  %cmp.n = icmp eq i64 %n.mod.vf, 0
1887  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1888
1889for.body.preheader:                               ; preds = %entry, %middle.block
1890  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1891  br label %for.body
1892
1893for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1894  ret void
1895
1896for.body:                                         ; preds = %for.body.preheader, %for.body
1897  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1898  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1899  %9 = load float, ptr %arrayidx, align 4
1900  %mul = fsub float %9, %x
1901  store float %mul, ptr %arrayidx, align 4
1902  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1903  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1904  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1905}
1906
1907define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) {
1908; CHECK-LABEL: sink_splat_frsub_scalable:
1909; CHECK:       # %bb.0: # %entry
1910; CHECK-NEXT:    csrr a1, vlenb
1911; CHECK-NEXT:    srli a3, a1, 2
1912; CHECK-NEXT:    li a2, 1024
1913; CHECK-NEXT:    bgeu a2, a3, .LBB31_2
1914; CHECK-NEXT:  # %bb.1:
1915; CHECK-NEXT:    li a2, 0
1916; CHECK-NEXT:    j .LBB31_5
1917; CHECK-NEXT:  .LBB31_2: # %vector.ph
1918; CHECK-NEXT:    addi a2, a3, -1
1919; CHECK-NEXT:    andi a4, a2, 1024
1920; CHECK-NEXT:    xori a2, a4, 1024
1921; CHECK-NEXT:    mv a5, a0
1922; CHECK-NEXT:    mv a6, a2
1923; CHECK-NEXT:    vsetvli a7, zero, e32, m1, ta, ma
1924; CHECK-NEXT:  .LBB31_3: # %vector.body
1925; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1926; CHECK-NEXT:    vl1re32.v v8, (a5)
1927; CHECK-NEXT:    sub a6, a6, a3
1928; CHECK-NEXT:    vfrsub.vf v8, v8, fa0
1929; CHECK-NEXT:    vs1r.v v8, (a5)
1930; CHECK-NEXT:    add a5, a5, a1
1931; CHECK-NEXT:    bnez a6, .LBB31_3
1932; CHECK-NEXT:  # %bb.4: # %middle.block
1933; CHECK-NEXT:    beqz a4, .LBB31_7
1934; CHECK-NEXT:  .LBB31_5: # %for.body.preheader
1935; CHECK-NEXT:    slli a1, a2, 2
1936; CHECK-NEXT:    lui a2, 1
1937; CHECK-NEXT:    add a1, a0, a1
1938; CHECK-NEXT:    add a0, a0, a2
1939; CHECK-NEXT:  .LBB31_6: # %for.body
1940; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
1941; CHECK-NEXT:    flw fa5, 0(a1)
1942; CHECK-NEXT:    fsub.s fa5, fa0, fa5
1943; CHECK-NEXT:    fsw fa5, 0(a1)
1944; CHECK-NEXT:    addi a1, a1, 4
1945; CHECK-NEXT:    bne a1, a0, .LBB31_6
1946; CHECK-NEXT:  .LBB31_7: # %for.cond.cleanup
1947; CHECK-NEXT:    ret
1948entry:
1949  %0 = call i64 @llvm.vscale.i64()
1950  %1 = shl i64 %0, 1
1951  %min.iters.check = icmp ugt i64 %1, 1024
1952  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
1953
1954vector.ph:                                        ; preds = %entry
1955  %2 = call i64 @llvm.vscale.i64()
1956  %3 = shl i64 %2, 1
1957  %n.mod.vf = urem i64 1024, %3
1958  %n.vec = sub nsw i64 1024, %n.mod.vf
1959  %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
1960  %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
1961  %4 = call i64 @llvm.vscale.i64()
1962  %5 = shl i64 %4, 1
1963  br label %vector.body
1964
1965vector.body:                                      ; preds = %vector.body, %vector.ph
1966  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
1967  %6 = getelementptr inbounds float, ptr %a, i64 %index
1968  %wide.load = load <vscale x 2 x float>, ptr %6, align 4
1969  %7 = fsub <vscale x 2 x float> %broadcast.splat, %wide.load
1970  store <vscale x 2 x float> %7, ptr %6, align 4
1971  %index.next = add nuw i64 %index, %5
1972  %8 = icmp eq i64 %index.next, %n.vec
1973  br i1 %8, label %middle.block, label %vector.body
1974
1975middle.block:                                     ; preds = %vector.body
1976  %cmp.n = icmp eq i64 %n.mod.vf, 0
1977  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
1978
1979for.body.preheader:                               ; preds = %entry, %middle.block
1980  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
1981  br label %for.body
1982
1983for.cond.cleanup:                                 ; preds = %for.body, %middle.block
1984  ret void
1985
1986for.body:                                         ; preds = %for.body.preheader, %for.body
1987  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
1988  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
1989  %9 = load float, ptr %arrayidx, align 4
1990  %mul = fsub float %x, %9
1991  store float %mul, ptr %arrayidx, align 4
1992  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1993  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
1994  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
1995}
1996
1997define void @sink_splat_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) {
1998; CHECK-LABEL: sink_splat_fma:
1999; CHECK:       # %bb.0: # %entry
2000; CHECK-NEXT:    lui a2, 1
2001; CHECK-NEXT:    add a2, a1, a2
2002; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2003; CHECK-NEXT:  .LBB32_1: # %vector.body
2004; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2005; CHECK-NEXT:    vle32.v v8, (a0)
2006; CHECK-NEXT:    vle32.v v9, (a1)
2007; CHECK-NEXT:    addi a1, a1, 16
2008; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
2009; CHECK-NEXT:    vse32.v v9, (a0)
2010; CHECK-NEXT:    addi a0, a0, 16
2011; CHECK-NEXT:    bne a1, a2, .LBB32_1
2012; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2013; CHECK-NEXT:    ret
2014entry:
2015  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
2016  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2017  br label %vector.body
2018
2019vector.body:                                      ; preds = %vector.body, %entry
2020  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2021  %0 = getelementptr inbounds float, ptr %a, i64 %index
2022  %wide.load = load <4 x float>, ptr %0, align 4
2023  %1 = getelementptr inbounds float, ptr %b, i64 %index
2024  %wide.load12 = load <4 x float>, ptr %1, align 4
2025  %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12)
2026  store <4 x float> %2, ptr %0, align 4
2027  %index.next = add nuw i64 %index, 4
2028  %3 = icmp eq i64 %index.next, 1024
2029  br i1 %3, label %for.cond.cleanup, label %vector.body
2030
2031for.cond.cleanup:                                 ; preds = %vector.body
2032  ret void
2033}
2034
2035define void @sink_splat_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x) {
2036; CHECK-LABEL: sink_splat_fma_commute:
2037; CHECK:       # %bb.0: # %entry
2038; CHECK-NEXT:    lui a2, 1
2039; CHECK-NEXT:    add a2, a1, a2
2040; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2041; CHECK-NEXT:  .LBB33_1: # %vector.body
2042; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2043; CHECK-NEXT:    vle32.v v8, (a0)
2044; CHECK-NEXT:    vle32.v v9, (a1)
2045; CHECK-NEXT:    addi a1, a1, 16
2046; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
2047; CHECK-NEXT:    vse32.v v9, (a0)
2048; CHECK-NEXT:    addi a0, a0, 16
2049; CHECK-NEXT:    bne a1, a2, .LBB33_1
2050; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2051; CHECK-NEXT:    ret
2052entry:
2053  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
2054  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2055  br label %vector.body
2056
2057vector.body:                                      ; preds = %vector.body, %entry
2058  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2059  %0 = getelementptr inbounds float, ptr %a, i64 %index
2060  %wide.load = load <4 x float>, ptr %0, align 4
2061  %1 = getelementptr inbounds float, ptr %b, i64 %index
2062  %wide.load12 = load <4 x float>, ptr %1, align 4
2063  %2 = call <4 x float> @llvm.fma.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
2064  store <4 x float> %2, ptr %0, align 4
2065  %index.next = add nuw i64 %index, 4
2066  %3 = icmp eq i64 %index.next, 1024
2067  br i1 %3, label %for.cond.cleanup, label %vector.body
2068
2069for.cond.cleanup:                                 ; preds = %vector.body
2070  ret void
2071}
2072
2073define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) {
2074; CHECK-LABEL: sink_splat_fma_scalable:
2075; CHECK:       # %bb.0: # %entry
2076; CHECK-NEXT:    csrr a2, vlenb
2077; CHECK-NEXT:    srli a4, a2, 2
2078; CHECK-NEXT:    li a3, 1024
2079; CHECK-NEXT:    bgeu a3, a4, .LBB34_2
2080; CHECK-NEXT:  # %bb.1:
2081; CHECK-NEXT:    li a3, 0
2082; CHECK-NEXT:    j .LBB34_5
2083; CHECK-NEXT:  .LBB34_2: # %vector.ph
2084; CHECK-NEXT:    addi a3, a4, -1
2085; CHECK-NEXT:    andi a5, a3, 1024
2086; CHECK-NEXT:    xori a3, a5, 1024
2087; CHECK-NEXT:    mv a6, a0
2088; CHECK-NEXT:    mv a7, a1
2089; CHECK-NEXT:    mv t0, a3
2090; CHECK-NEXT:    vsetvli t1, zero, e32, m1, ta, ma
2091; CHECK-NEXT:  .LBB34_3: # %vector.body
2092; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2093; CHECK-NEXT:    vl1re32.v v8, (a6)
2094; CHECK-NEXT:    vl1re32.v v9, (a7)
2095; CHECK-NEXT:    sub t0, t0, a4
2096; CHECK-NEXT:    add a7, a7, a2
2097; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
2098; CHECK-NEXT:    vs1r.v v9, (a6)
2099; CHECK-NEXT:    add a6, a6, a2
2100; CHECK-NEXT:    bnez t0, .LBB34_3
2101; CHECK-NEXT:  # %bb.4: # %middle.block
2102; CHECK-NEXT:    beqz a5, .LBB34_7
2103; CHECK-NEXT:  .LBB34_5: # %for.body.preheader
2104; CHECK-NEXT:    slli a2, a3, 2
2105; CHECK-NEXT:    lui a3, 1
2106; CHECK-NEXT:    add a0, a0, a2
2107; CHECK-NEXT:    add a2, a1, a2
2108; CHECK-NEXT:    add a1, a1, a3
2109; CHECK-NEXT:  .LBB34_6: # %for.body
2110; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2111; CHECK-NEXT:    flw fa5, 0(a0)
2112; CHECK-NEXT:    flw fa4, 0(a2)
2113; CHECK-NEXT:    addi a2, a2, 4
2114; CHECK-NEXT:    fmadd.s fa5, fa5, fa0, fa4
2115; CHECK-NEXT:    fsw fa5, 0(a0)
2116; CHECK-NEXT:    addi a0, a0, 4
2117; CHECK-NEXT:    bne a2, a1, .LBB34_6
2118; CHECK-NEXT:  .LBB34_7: # %for.cond.cleanup
2119; CHECK-NEXT:    ret
2120entry:
2121  %0 = call i64 @llvm.vscale.i64()
2122  %1 = shl i64 %0, 1
2123  %min.iters.check = icmp ugt i64 %1, 1024
2124  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2125
2126vector.ph:                                        ; preds = %entry
2127  %2 = call i64 @llvm.vscale.i64()
2128  %3 = shl i64 %2, 1
2129  %n.mod.vf = urem i64 1024, %3
2130  %n.vec = sub nsw i64 1024, %n.mod.vf
2131  %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
2132  %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
2133  %4 = call i64 @llvm.vscale.i64()
2134  %5 = shl i64 %4, 1
2135  br label %vector.body
2136
2137vector.body:                                      ; preds = %vector.body, %vector.ph
2138  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2139  %6 = getelementptr inbounds float, ptr %a, i64 %index
2140  %wide.load = load <vscale x 2 x float>, ptr %6, align 4
2141  %7 = getelementptr inbounds float, ptr %b, i64 %index
2142  %wide.load12 = load <vscale x 2 x float>, ptr %7, align 4
2143  %8 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %wide.load, <vscale x 2 x float> %broadcast.splat, <vscale x 2 x float> %wide.load12)
2144  store <vscale x 2 x float> %8, ptr %6, align 4
2145  %index.next = add nuw i64 %index, %5
2146  %9 = icmp eq i64 %index.next, %n.vec
2147  br i1 %9, label %middle.block, label %vector.body
2148
2149middle.block:                                     ; preds = %vector.body
2150  %cmp.n = icmp eq i64 %n.mod.vf, 0
2151  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2152
2153for.body.preheader:                               ; preds = %entry, %middle.block
2154  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2155  br label %for.body
2156
2157for.cond.cleanup:                                 ; preds = %for.body, %middle.block
2158  ret void
2159
2160for.body:                                         ; preds = %for.body.preheader, %for.body
2161  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2162  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
2163  %10 = load float, ptr %arrayidx, align 4
2164  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
2165  %11 = load float, ptr %arrayidx2, align 4
2166  %12 = tail call float @llvm.fma.f32(float %10, float %x, float %11)
2167  store float %12, ptr %arrayidx, align 4
2168  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2169  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2170  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2171}
2172
2173define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) {
2174; CHECK-LABEL: sink_splat_fma_commute_scalable:
2175; CHECK:       # %bb.0: # %entry
2176; CHECK-NEXT:    csrr a2, vlenb
2177; CHECK-NEXT:    srli a4, a2, 2
2178; CHECK-NEXT:    li a3, 1024
2179; CHECK-NEXT:    bgeu a3, a4, .LBB35_2
2180; CHECK-NEXT:  # %bb.1:
2181; CHECK-NEXT:    li a3, 0
2182; CHECK-NEXT:    j .LBB35_5
2183; CHECK-NEXT:  .LBB35_2: # %vector.ph
2184; CHECK-NEXT:    addi a3, a4, -1
2185; CHECK-NEXT:    andi a5, a3, 1024
2186; CHECK-NEXT:    xori a3, a5, 1024
2187; CHECK-NEXT:    mv a6, a0
2188; CHECK-NEXT:    mv a7, a1
2189; CHECK-NEXT:    mv t0, a3
2190; CHECK-NEXT:    vsetvli t1, zero, e32, m1, ta, ma
2191; CHECK-NEXT:  .LBB35_3: # %vector.body
2192; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2193; CHECK-NEXT:    vl1re32.v v8, (a6)
2194; CHECK-NEXT:    vl1re32.v v9, (a7)
2195; CHECK-NEXT:    sub t0, t0, a4
2196; CHECK-NEXT:    add a7, a7, a2
2197; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
2198; CHECK-NEXT:    vs1r.v v9, (a6)
2199; CHECK-NEXT:    add a6, a6, a2
2200; CHECK-NEXT:    bnez t0, .LBB35_3
2201; CHECK-NEXT:  # %bb.4: # %middle.block
2202; CHECK-NEXT:    beqz a5, .LBB35_7
2203; CHECK-NEXT:  .LBB35_5: # %for.body.preheader
2204; CHECK-NEXT:    slli a2, a3, 2
2205; CHECK-NEXT:    lui a3, 1
2206; CHECK-NEXT:    add a0, a0, a2
2207; CHECK-NEXT:    add a2, a1, a2
2208; CHECK-NEXT:    add a1, a1, a3
2209; CHECK-NEXT:  .LBB35_6: # %for.body
2210; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2211; CHECK-NEXT:    flw fa5, 0(a0)
2212; CHECK-NEXT:    flw fa4, 0(a2)
2213; CHECK-NEXT:    addi a2, a2, 4
2214; CHECK-NEXT:    fmadd.s fa5, fa0, fa5, fa4
2215; CHECK-NEXT:    fsw fa5, 0(a0)
2216; CHECK-NEXT:    addi a0, a0, 4
2217; CHECK-NEXT:    bne a2, a1, .LBB35_6
2218; CHECK-NEXT:  .LBB35_7: # %for.cond.cleanup
2219; CHECK-NEXT:    ret
2220entry:
2221  %0 = call i64 @llvm.vscale.i64()
2222  %1 = shl i64 %0, 1
2223  %min.iters.check = icmp ugt i64 %1, 1024
2224  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2225
2226vector.ph:                                        ; preds = %entry
2227  %2 = call i64 @llvm.vscale.i64()
2228  %3 = shl i64 %2, 1
2229  %n.mod.vf = urem i64 1024, %3
2230  %n.vec = sub nsw i64 1024, %n.mod.vf
2231  %broadcast.splatinsert = insertelement <vscale x 2 x float> poison, float %x, i32 0
2232  %broadcast.splat = shufflevector <vscale x 2 x float> %broadcast.splatinsert, <vscale x 2 x float> poison, <vscale x 2 x i32> zeroinitializer
2233  %4 = call i64 @llvm.vscale.i64()
2234  %5 = shl i64 %4, 1
2235  br label %vector.body
2236
2237vector.body:                                      ; preds = %vector.body, %vector.ph
2238  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2239  %6 = getelementptr inbounds float, ptr %a, i64 %index
2240  %wide.load = load <vscale x 2 x float>, ptr %6, align 4
2241  %7 = getelementptr inbounds float, ptr %b, i64 %index
2242  %wide.load12 = load <vscale x 2 x float>, ptr %7, align 4
2243  %8 = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %broadcast.splat, <vscale x 2 x float> %wide.load, <vscale x 2 x float> %wide.load12)
2244  store <vscale x 2 x float> %8, ptr %6, align 4
2245  %index.next = add nuw i64 %index, %5
2246  %9 = icmp eq i64 %index.next, %n.vec
2247  br i1 %9, label %middle.block, label %vector.body
2248
2249middle.block:                                     ; preds = %vector.body
2250  %cmp.n = icmp eq i64 %n.mod.vf, 0
2251  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2252
2253for.body.preheader:                               ; preds = %entry, %middle.block
2254  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2255  br label %for.body
2256
2257for.cond.cleanup:                                 ; preds = %for.body, %middle.block
2258  ret void
2259
2260for.body:                                         ; preds = %for.body.preheader, %for.body
2261  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2262  %arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
2263  %10 = load float, ptr %arrayidx, align 4
2264  %arrayidx2 = getelementptr inbounds float, ptr %b, i64 %indvars.iv
2265  %11 = load float, ptr %arrayidx2, align 4
2266  %12 = tail call float @llvm.fma.f32(float %x, float %10, float %11)
2267  store float %12, ptr %arrayidx, align 4
2268  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2269  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2270  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2271}
2272
2273declare i64 @llvm.vscale.i64()
2274declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
2275declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
2276declare float @llvm.fma.f32(float, float, float)
2277
2278define void @sink_splat_icmp(ptr nocapture %x, i32 signext %y) {
2279; CHECK-LABEL: sink_splat_icmp:
2280; CHECK:       # %bb.0: # %entry
2281; CHECK-NEXT:    lui a2, 1
2282; CHECK-NEXT:    add a2, a0, a2
2283; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2284; CHECK-NEXT:    vmv.v.i v8, 0
2285; CHECK-NEXT:  .LBB36_1: # %vector.body
2286; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2287; CHECK-NEXT:    vle32.v v9, (a0)
2288; CHECK-NEXT:    vmseq.vx v0, v9, a1
2289; CHECK-NEXT:    vse32.v v8, (a0), v0.t
2290; CHECK-NEXT:    addi a0, a0, 16
2291; CHECK-NEXT:    bne a0, a2, .LBB36_1
2292; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2293; CHECK-NEXT:    ret
2294entry:
2295  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %y, i32 0
2296  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2297  br label %vector.body
2298
2299vector.body:                                      ; preds = %vector.body, %entry
2300  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2301  %0 = getelementptr inbounds i32, ptr %x, i64 %index
2302  %wide.load = load <4 x i32>, ptr %0, align 4
2303  %1 = icmp eq <4 x i32> %wide.load, %broadcast.splat
2304  call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
2305  %index.next = add nuw i64 %index, 4
2306  %2 = icmp eq i64 %index.next, 1024
2307  br i1 %2, label %for.cond.cleanup, label %vector.body
2308
2309for.cond.cleanup:                                 ; preds = %vector.body
2310  ret void
2311}
2312declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>)
2313
2314define void @sink_splat_fcmp(ptr nocapture %x, float %y) {
2315; CHECK-LABEL: sink_splat_fcmp:
2316; CHECK:       # %bb.0: # %entry
2317; CHECK-NEXT:    lui a1, 1
2318; CHECK-NEXT:    add a1, a0, a1
2319; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2320; CHECK-NEXT:    vmv.v.i v8, 0
2321; CHECK-NEXT:  .LBB37_1: # %vector.body
2322; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2323; CHECK-NEXT:    vle32.v v9, (a0)
2324; CHECK-NEXT:    vmfeq.vf v0, v9, fa0
2325; CHECK-NEXT:    vse32.v v8, (a0), v0.t
2326; CHECK-NEXT:    addi a0, a0, 16
2327; CHECK-NEXT:    bne a0, a1, .LBB37_1
2328; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2329; CHECK-NEXT:    ret
2330entry:
2331  %broadcast.splatinsert = insertelement <4 x float> poison, float %y, i32 0
2332  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
2333  br label %vector.body
2334
2335vector.body:                                      ; preds = %vector.body, %entry
2336  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2337  %0 = getelementptr inbounds float, ptr %x, i64 %index
2338  %wide.load = load <4 x float>, ptr %0, align 4
2339  %1 = fcmp fast oeq <4 x float> %wide.load, %broadcast.splat
2340  call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
2341  %index.next = add nuw i64 %index, 4
2342  %2 = icmp eq i64 %index.next, 1024
2343  br i1 %2, label %for.cond.cleanup, label %vector.body
2344
2345for.cond.cleanup:                                 ; preds = %vector.body
2346  ret void
2347}
2348declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>)
2349
2350define void @sink_splat_udiv(ptr nocapture %a, i32 signext %x) {
2351; CHECK-LABEL: sink_splat_udiv:
2352; CHECK:       # %bb.0: # %entry
2353; CHECK-NEXT:    lui a2, 1
2354; CHECK-NEXT:    add a2, a0, a2
2355; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2356; CHECK-NEXT:  .LBB38_1: # %vector.body
2357; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2358; CHECK-NEXT:    vle32.v v8, (a0)
2359; CHECK-NEXT:    vdivu.vx v8, v8, a1
2360; CHECK-NEXT:    vse32.v v8, (a0)
2361; CHECK-NEXT:    addi a0, a0, 16
2362; CHECK-NEXT:    bne a0, a2, .LBB38_1
2363; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2364; CHECK-NEXT:    ret
2365entry:
2366  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2367  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2368  br label %vector.body
2369
2370vector.body:                                      ; preds = %vector.body, %entry
2371  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2372  %0 = getelementptr inbounds i32, ptr %a, i64 %index
2373  %wide.load = load <4 x i32>, ptr %0, align 4
2374  %1 = udiv <4 x i32> %wide.load, %broadcast.splat
2375  store <4 x i32> %1, ptr %0, align 4
2376  %index.next = add nuw i64 %index, 4
2377  %2 = icmp eq i64 %index.next, 1024
2378  br i1 %2, label %for.cond.cleanup, label %vector.body
2379
2380for.cond.cleanup:                                 ; preds = %vector.body
2381  ret void
2382}
2383
2384define void @sink_splat_sdiv(ptr nocapture %a, i32 signext %x) {
2385; CHECK-LABEL: sink_splat_sdiv:
2386; CHECK:       # %bb.0: # %entry
2387; CHECK-NEXT:    lui a2, 1
2388; CHECK-NEXT:    add a2, a0, a2
2389; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2390; CHECK-NEXT:  .LBB39_1: # %vector.body
2391; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2392; CHECK-NEXT:    vle32.v v8, (a0)
2393; CHECK-NEXT:    vdiv.vx v8, v8, a1
2394; CHECK-NEXT:    vse32.v v8, (a0)
2395; CHECK-NEXT:    addi a0, a0, 16
2396; CHECK-NEXT:    bne a0, a2, .LBB39_1
2397; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2398; CHECK-NEXT:    ret
2399entry:
2400  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2401  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2402  br label %vector.body
2403
2404vector.body:                                      ; preds = %vector.body, %entry
2405  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2406  %0 = getelementptr inbounds i32, ptr %a, i64 %index
2407  %wide.load = load <4 x i32>, ptr %0, align 4
2408  %1 = sdiv <4 x i32> %wide.load, %broadcast.splat
2409  store <4 x i32> %1, ptr %0, align 4
2410  %index.next = add nuw i64 %index, 4
2411  %2 = icmp eq i64 %index.next, 1024
2412  br i1 %2, label %for.cond.cleanup, label %vector.body
2413
2414for.cond.cleanup:                                 ; preds = %vector.body
2415  ret void
2416}
2417
2418define void @sink_splat_urem(ptr nocapture %a, i32 signext %x) {
2419; CHECK-LABEL: sink_splat_urem:
2420; CHECK:       # %bb.0: # %entry
2421; CHECK-NEXT:    lui a2, 1
2422; CHECK-NEXT:    add a2, a0, a2
2423; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2424; CHECK-NEXT:  .LBB40_1: # %vector.body
2425; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2426; CHECK-NEXT:    vle32.v v8, (a0)
2427; CHECK-NEXT:    vremu.vx v8, v8, a1
2428; CHECK-NEXT:    vse32.v v8, (a0)
2429; CHECK-NEXT:    addi a0, a0, 16
2430; CHECK-NEXT:    bne a0, a2, .LBB40_1
2431; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2432; CHECK-NEXT:    ret
2433entry:
2434  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2435  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2436  br label %vector.body
2437
2438vector.body:                                      ; preds = %vector.body, %entry
2439  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2440  %0 = getelementptr inbounds i32, ptr %a, i64 %index
2441  %wide.load = load <4 x i32>, ptr %0, align 4
2442  %1 = urem <4 x i32> %wide.load, %broadcast.splat
2443  store <4 x i32> %1, ptr %0, align 4
2444  %index.next = add nuw i64 %index, 4
2445  %2 = icmp eq i64 %index.next, 1024
2446  br i1 %2, label %for.cond.cleanup, label %vector.body
2447
2448for.cond.cleanup:                                 ; preds = %vector.body
2449  ret void
2450}
2451
2452define void @sink_splat_srem(ptr nocapture %a, i32 signext %x) {
2453; CHECK-LABEL: sink_splat_srem:
2454; CHECK:       # %bb.0: # %entry
2455; CHECK-NEXT:    lui a2, 1
2456; CHECK-NEXT:    add a2, a0, a2
2457; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2458; CHECK-NEXT:  .LBB41_1: # %vector.body
2459; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2460; CHECK-NEXT:    vle32.v v8, (a0)
2461; CHECK-NEXT:    vrem.vx v8, v8, a1
2462; CHECK-NEXT:    vse32.v v8, (a0)
2463; CHECK-NEXT:    addi a0, a0, 16
2464; CHECK-NEXT:    bne a0, a2, .LBB41_1
2465; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2466; CHECK-NEXT:    ret
2467entry:
2468  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2469  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2470  br label %vector.body
2471
2472vector.body:                                      ; preds = %vector.body, %entry
2473  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2474  %0 = getelementptr inbounds i32, ptr %a, i64 %index
2475  %wide.load = load <4 x i32>, ptr %0, align 4
2476  %1 = srem <4 x i32> %wide.load, %broadcast.splat
2477  store <4 x i32> %1, ptr %0, align 4
2478  %index.next = add nuw i64 %index, 4
2479  %2 = icmp eq i64 %index.next, 1024
2480  br i1 %2, label %for.cond.cleanup, label %vector.body
2481
2482for.cond.cleanup:                                 ; preds = %vector.body
2483  ret void
2484}
2485
2486define void @sink_splat_udiv_scalable(ptr nocapture %a, i32 signext %x) {
2487; CHECK-LABEL: sink_splat_udiv_scalable:
2488; CHECK:       # %bb.0: # %entry
2489; CHECK-NEXT:    csrr a5, vlenb
2490; CHECK-NEXT:    srli a3, a5, 1
2491; CHECK-NEXT:    li a2, 1024
2492; CHECK-NEXT:    bgeu a2, a3, .LBB42_2
2493; CHECK-NEXT:  # %bb.1:
2494; CHECK-NEXT:    li a2, 0
2495; CHECK-NEXT:    j .LBB42_5
2496; CHECK-NEXT:  .LBB42_2: # %vector.ph
2497; CHECK-NEXT:    addi a2, a3, -1
2498; CHECK-NEXT:    andi a4, a2, 1024
2499; CHECK-NEXT:    xori a2, a4, 1024
2500; CHECK-NEXT:    slli a5, a5, 1
2501; CHECK-NEXT:    mv a6, a0
2502; CHECK-NEXT:    mv a7, a2
2503; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
2504; CHECK-NEXT:  .LBB42_3: # %vector.body
2505; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2506; CHECK-NEXT:    vl2re32.v v8, (a6)
2507; CHECK-NEXT:    sub a7, a7, a3
2508; CHECK-NEXT:    vdivu.vx v8, v8, a1
2509; CHECK-NEXT:    vs2r.v v8, (a6)
2510; CHECK-NEXT:    add a6, a6, a5
2511; CHECK-NEXT:    bnez a7, .LBB42_3
2512; CHECK-NEXT:  # %bb.4: # %middle.block
2513; CHECK-NEXT:    beqz a4, .LBB42_7
2514; CHECK-NEXT:  .LBB42_5: # %for.body.preheader
2515; CHECK-NEXT:    slli a2, a2, 2
2516; CHECK-NEXT:    lui a3, 1
2517; CHECK-NEXT:    add a2, a0, a2
2518; CHECK-NEXT:    add a0, a0, a3
2519; CHECK-NEXT:  .LBB42_6: # %for.body
2520; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2521; CHECK-NEXT:    lw a3, 0(a2)
2522; CHECK-NEXT:    divuw a3, a3, a1
2523; CHECK-NEXT:    sw a3, 0(a2)
2524; CHECK-NEXT:    addi a2, a2, 4
2525; CHECK-NEXT:    bne a2, a0, .LBB42_6
2526; CHECK-NEXT:  .LBB42_7: # %for.cond.cleanup
2527; CHECK-NEXT:    ret
2528entry:
2529  %0 = call i64 @llvm.vscale.i64()
2530  %1 = shl i64 %0, 2
2531  %min.iters.check = icmp ugt i64 %1, 1024
2532  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2533
2534vector.ph:                                        ; preds = %entry
2535  %2 = call i64 @llvm.vscale.i64()
2536  %3 = shl i64 %2, 2
2537  %n.mod.vf = urem i64 1024, %3
2538  %n.vec = sub nsw i64 1024, %n.mod.vf
2539  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2540  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2541  %4 = call i64 @llvm.vscale.i64()
2542  %5 = shl i64 %4, 2
2543  br label %vector.body
2544
2545vector.body:                                      ; preds = %vector.body, %vector.ph
2546  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2547  %6 = getelementptr inbounds i32, ptr %a, i64 %index
2548  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
2549  %7 = udiv <vscale x 4 x i32> %wide.load, %broadcast.splat
2550  store <vscale x 4 x i32> %7, ptr %6, align 4
2551  %index.next = add nuw i64 %index, %5
2552  %8 = icmp eq i64 %index.next, %n.vec
2553  br i1 %8, label %middle.block, label %vector.body
2554
2555middle.block:                                     ; preds = %vector.body
2556  %cmp.n = icmp eq i64 %n.mod.vf, 0
2557  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2558
2559for.body.preheader:                               ; preds = %entry, %middle.block
2560  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2561  br label %for.body
2562
2563for.cond.cleanup:                                 ; preds = %for.body, %middle.block
2564  ret void
2565
2566for.body:                                         ; preds = %for.body.preheader, %for.body
2567  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2568  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2569  %9 = load i32, ptr %arrayidx, align 4
2570  %div = udiv i32 %9, %x
2571  store i32 %div, ptr %arrayidx, align 4
2572  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2573  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2574  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2575}
2576
2577define void @sink_splat_sdiv_scalable(ptr nocapture %a, i32 signext %x) {
2578; CHECK-LABEL: sink_splat_sdiv_scalable:
2579; CHECK:       # %bb.0: # %entry
2580; CHECK-NEXT:    csrr a5, vlenb
2581; CHECK-NEXT:    srli a3, a5, 1
2582; CHECK-NEXT:    li a2, 1024
2583; CHECK-NEXT:    bgeu a2, a3, .LBB43_2
2584; CHECK-NEXT:  # %bb.1:
2585; CHECK-NEXT:    li a2, 0
2586; CHECK-NEXT:    j .LBB43_5
2587; CHECK-NEXT:  .LBB43_2: # %vector.ph
2588; CHECK-NEXT:    addi a2, a3, -1
2589; CHECK-NEXT:    andi a4, a2, 1024
2590; CHECK-NEXT:    xori a2, a4, 1024
2591; CHECK-NEXT:    slli a5, a5, 1
2592; CHECK-NEXT:    mv a6, a0
2593; CHECK-NEXT:    mv a7, a2
2594; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
2595; CHECK-NEXT:  .LBB43_3: # %vector.body
2596; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2597; CHECK-NEXT:    vl2re32.v v8, (a6)
2598; CHECK-NEXT:    sub a7, a7, a3
2599; CHECK-NEXT:    vdiv.vx v8, v8, a1
2600; CHECK-NEXT:    vs2r.v v8, (a6)
2601; CHECK-NEXT:    add a6, a6, a5
2602; CHECK-NEXT:    bnez a7, .LBB43_3
2603; CHECK-NEXT:  # %bb.4: # %middle.block
2604; CHECK-NEXT:    beqz a4, .LBB43_7
2605; CHECK-NEXT:  .LBB43_5: # %for.body.preheader
2606; CHECK-NEXT:    slli a2, a2, 2
2607; CHECK-NEXT:    lui a3, 1
2608; CHECK-NEXT:    add a2, a0, a2
2609; CHECK-NEXT:    add a0, a0, a3
2610; CHECK-NEXT:  .LBB43_6: # %for.body
2611; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2612; CHECK-NEXT:    lw a3, 0(a2)
2613; CHECK-NEXT:    divw a3, a3, a1
2614; CHECK-NEXT:    sw a3, 0(a2)
2615; CHECK-NEXT:    addi a2, a2, 4
2616; CHECK-NEXT:    bne a2, a0, .LBB43_6
2617; CHECK-NEXT:  .LBB43_7: # %for.cond.cleanup
2618; CHECK-NEXT:    ret
2619entry:
2620  %0 = call i64 @llvm.vscale.i64()
2621  %1 = shl i64 %0, 2
2622  %min.iters.check = icmp ugt i64 %1, 1024
2623  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2624
2625vector.ph:                                        ; preds = %entry
2626  %2 = call i64 @llvm.vscale.i64()
2627  %3 = shl i64 %2, 2
2628  %n.mod.vf = urem i64 1024, %3
2629  %n.vec = sub nsw i64 1024, %n.mod.vf
2630  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2631  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2632  %4 = call i64 @llvm.vscale.i64()
2633  %5 = shl i64 %4, 2
2634  br label %vector.body
2635
2636vector.body:                                      ; preds = %vector.body, %vector.ph
2637  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2638  %6 = getelementptr inbounds i32, ptr %a, i64 %index
2639  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
2640  %7 = sdiv <vscale x 4 x i32> %wide.load, %broadcast.splat
2641  store <vscale x 4 x i32> %7, ptr %6, align 4
2642  %index.next = add nuw i64 %index, %5
2643  %8 = icmp eq i64 %index.next, %n.vec
2644  br i1 %8, label %middle.block, label %vector.body
2645
2646middle.block:                                     ; preds = %vector.body
2647  %cmp.n = icmp eq i64 %n.mod.vf, 0
2648  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2649
2650for.body.preheader:                               ; preds = %entry, %middle.block
2651  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2652  br label %for.body
2653
2654for.cond.cleanup:                                 ; preds = %for.body, %middle.block
2655  ret void
2656
2657for.body:                                         ; preds = %for.body.preheader, %for.body
2658  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2659  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2660  %9 = load i32, ptr %arrayidx, align 4
2661  %div = sdiv i32 %9, %x
2662  store i32 %div, ptr %arrayidx, align 4
2663  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2664  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2665  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2666}
2667
2668define void @sink_splat_urem_scalable(ptr nocapture %a, i32 signext %x) {
2669; CHECK-LABEL: sink_splat_urem_scalable:
2670; CHECK:       # %bb.0: # %entry
2671; CHECK-NEXT:    csrr a5, vlenb
2672; CHECK-NEXT:    srli a3, a5, 1
2673; CHECK-NEXT:    li a2, 1024
2674; CHECK-NEXT:    bgeu a2, a3, .LBB44_2
2675; CHECK-NEXT:  # %bb.1:
2676; CHECK-NEXT:    li a2, 0
2677; CHECK-NEXT:    j .LBB44_5
2678; CHECK-NEXT:  .LBB44_2: # %vector.ph
2679; CHECK-NEXT:    addi a2, a3, -1
2680; CHECK-NEXT:    andi a4, a2, 1024
2681; CHECK-NEXT:    xori a2, a4, 1024
2682; CHECK-NEXT:    slli a5, a5, 1
2683; CHECK-NEXT:    mv a6, a0
2684; CHECK-NEXT:    mv a7, a2
2685; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
2686; CHECK-NEXT:  .LBB44_3: # %vector.body
2687; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2688; CHECK-NEXT:    vl2re32.v v8, (a6)
2689; CHECK-NEXT:    sub a7, a7, a3
2690; CHECK-NEXT:    vremu.vx v8, v8, a1
2691; CHECK-NEXT:    vs2r.v v8, (a6)
2692; CHECK-NEXT:    add a6, a6, a5
2693; CHECK-NEXT:    bnez a7, .LBB44_3
2694; CHECK-NEXT:  # %bb.4: # %middle.block
2695; CHECK-NEXT:    beqz a4, .LBB44_7
2696; CHECK-NEXT:  .LBB44_5: # %for.body.preheader
2697; CHECK-NEXT:    slli a2, a2, 2
2698; CHECK-NEXT:    lui a3, 1
2699; CHECK-NEXT:    add a2, a0, a2
2700; CHECK-NEXT:    add a0, a0, a3
2701; CHECK-NEXT:  .LBB44_6: # %for.body
2702; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2703; CHECK-NEXT:    lw a3, 0(a2)
2704; CHECK-NEXT:    remuw a3, a3, a1
2705; CHECK-NEXT:    sw a3, 0(a2)
2706; CHECK-NEXT:    addi a2, a2, 4
2707; CHECK-NEXT:    bne a2, a0, .LBB44_6
2708; CHECK-NEXT:  .LBB44_7: # %for.cond.cleanup
2709; CHECK-NEXT:    ret
2710entry:
2711  %0 = call i64 @llvm.vscale.i64()
2712  %1 = shl i64 %0, 2
2713  %min.iters.check = icmp ugt i64 %1, 1024
2714  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2715
2716vector.ph:                                        ; preds = %entry
2717  %2 = call i64 @llvm.vscale.i64()
2718  %3 = shl i64 %2, 2
2719  %n.mod.vf = urem i64 1024, %3
2720  %n.vec = sub nsw i64 1024, %n.mod.vf
2721  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2722  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2723  %4 = call i64 @llvm.vscale.i64()
2724  %5 = shl i64 %4, 2
2725  br label %vector.body
2726
2727vector.body:                                      ; preds = %vector.body, %vector.ph
2728  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2729  %6 = getelementptr inbounds i32, ptr %a, i64 %index
2730  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
2731  %7 = urem <vscale x 4 x i32> %wide.load, %broadcast.splat
2732  store <vscale x 4 x i32> %7, ptr %6, align 4
2733  %index.next = add nuw i64 %index, %5
2734  %8 = icmp eq i64 %index.next, %n.vec
2735  br i1 %8, label %middle.block, label %vector.body
2736
2737middle.block:                                     ; preds = %vector.body
2738  %cmp.n = icmp eq i64 %n.mod.vf, 0
2739  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2740
2741for.body.preheader:                               ; preds = %entry, %middle.block
2742  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2743  br label %for.body
2744
2745for.cond.cleanup:                                 ; preds = %for.body, %middle.block
2746  ret void
2747
2748for.body:                                         ; preds = %for.body.preheader, %for.body
2749  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2750  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2751  %9 = load i32, ptr %arrayidx, align 4
2752  %rem = urem i32 %9, %x
2753  store i32 %rem, ptr %arrayidx, align 4
2754  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2755  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2756  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2757}
2758
2759define void @sink_splat_srem_scalable(ptr nocapture %a, i32 signext %x) {
2760; CHECK-LABEL: sink_splat_srem_scalable:
2761; CHECK:       # %bb.0: # %entry
2762; CHECK-NEXT:    csrr a5, vlenb
2763; CHECK-NEXT:    srli a3, a5, 1
2764; CHECK-NEXT:    li a2, 1024
2765; CHECK-NEXT:    bgeu a2, a3, .LBB45_2
2766; CHECK-NEXT:  # %bb.1:
2767; CHECK-NEXT:    li a2, 0
2768; CHECK-NEXT:    j .LBB45_5
2769; CHECK-NEXT:  .LBB45_2: # %vector.ph
2770; CHECK-NEXT:    addi a2, a3, -1
2771; CHECK-NEXT:    andi a4, a2, 1024
2772; CHECK-NEXT:    xori a2, a4, 1024
2773; CHECK-NEXT:    slli a5, a5, 1
2774; CHECK-NEXT:    mv a6, a0
2775; CHECK-NEXT:    mv a7, a2
2776; CHECK-NEXT:    vsetvli t0, zero, e32, m2, ta, ma
2777; CHECK-NEXT:  .LBB45_3: # %vector.body
2778; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2779; CHECK-NEXT:    vl2re32.v v8, (a6)
2780; CHECK-NEXT:    sub a7, a7, a3
2781; CHECK-NEXT:    vrem.vx v8, v8, a1
2782; CHECK-NEXT:    vs2r.v v8, (a6)
2783; CHECK-NEXT:    add a6, a6, a5
2784; CHECK-NEXT:    bnez a7, .LBB45_3
2785; CHECK-NEXT:  # %bb.4: # %middle.block
2786; CHECK-NEXT:    beqz a4, .LBB45_7
2787; CHECK-NEXT:  .LBB45_5: # %for.body.preheader
2788; CHECK-NEXT:    slli a2, a2, 2
2789; CHECK-NEXT:    lui a3, 1
2790; CHECK-NEXT:    add a2, a0, a2
2791; CHECK-NEXT:    add a0, a0, a3
2792; CHECK-NEXT:  .LBB45_6: # %for.body
2793; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2794; CHECK-NEXT:    lw a3, 0(a2)
2795; CHECK-NEXT:    remw a3, a3, a1
2796; CHECK-NEXT:    sw a3, 0(a2)
2797; CHECK-NEXT:    addi a2, a2, 4
2798; CHECK-NEXT:    bne a2, a0, .LBB45_6
2799; CHECK-NEXT:  .LBB45_7: # %for.cond.cleanup
2800; CHECK-NEXT:    ret
2801entry:
2802  %0 = call i64 @llvm.vscale.i64()
2803  %1 = shl i64 %0, 2
2804  %min.iters.check = icmp ugt i64 %1, 1024
2805  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
2806
2807vector.ph:                                        ; preds = %entry
2808  %2 = call i64 @llvm.vscale.i64()
2809  %3 = shl i64 %2, 2
2810  %n.mod.vf = urem i64 1024, %3
2811  %n.vec = sub nsw i64 1024, %n.mod.vf
2812  %broadcast.splatinsert = insertelement <vscale x 4 x i32> poison, i32 %x, i32 0
2813  %broadcast.splat = shufflevector <vscale x 4 x i32> %broadcast.splatinsert, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
2814  %4 = call i64 @llvm.vscale.i64()
2815  %5 = shl i64 %4, 2
2816  br label %vector.body
2817
2818vector.body:                                      ; preds = %vector.body, %vector.ph
2819  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
2820  %6 = getelementptr inbounds i32, ptr %a, i64 %index
2821  %wide.load = load <vscale x 4 x i32>, ptr %6, align 4
2822  %7 = srem <vscale x 4 x i32> %wide.load, %broadcast.splat
2823  store <vscale x 4 x i32> %7, ptr %6, align 4
2824  %index.next = add nuw i64 %index, %5
2825  %8 = icmp eq i64 %index.next, %n.vec
2826  br i1 %8, label %middle.block, label %vector.body
2827
2828middle.block:                                     ; preds = %vector.body
2829  %cmp.n = icmp eq i64 %n.mod.vf, 0
2830  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
2831
2832for.body.preheader:                               ; preds = %entry, %middle.block
2833  %indvars.iv.ph = phi i64 [ 0, %entry ], [ %n.vec, %middle.block ]
2834  br label %for.body
2835
2836for.cond.cleanup:                                 ; preds = %for.body, %middle.block
2837  ret void
2838
2839for.body:                                         ; preds = %for.body.preheader, %for.body
2840  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
2841  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
2842  %9 = load i32, ptr %arrayidx, align 4
2843  %rem = srem i32 %9, %x
2844  store i32 %rem, ptr %arrayidx, align 4
2845  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2846  %cmp.not = icmp eq i64 %indvars.iv.next, 1024
2847  br i1 %cmp.not, label %for.cond.cleanup, label %for.body
2848}
2849
2850declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>)
2851
2852define void @sink_splat_min(ptr nocapture %a, i32 signext %x) {
2853; CHECK-LABEL: sink_splat_min:
2854; CHECK:       # %bb.0: # %entry
2855; CHECK-NEXT:    li a2, 1024
2856; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2857; CHECK-NEXT:  .LBB46_1: # %vector.body
2858; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2859; CHECK-NEXT:    vle32.v v8, (a0)
2860; CHECK-NEXT:    addi a2, a2, 4
2861; CHECK-NEXT:    vmin.vx v8, v8, a1
2862; CHECK-NEXT:    vse32.v v8, (a0)
2863; CHECK-NEXT:    addi a0, a0, -16
2864; CHECK-NEXT:    bnez a2, .LBB46_1
2865; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2866; CHECK-NEXT:    ret
2867entry:
2868  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2869  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2870  br label %vector.body
2871
2872vector.body:                                      ; preds = %vector.body, %entry
2873  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2874  %0 = getelementptr inbounds i32, ptr %a, i64 %index
2875  %wide.load = load <4 x i32>, ptr %0, align 4
2876  %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
2877  store <4 x i32> %1, ptr %0, align 4
2878  %index.next = sub nuw i64 %index, 4
2879  %2 = icmp eq i64 %index.next, 1024
2880  br i1 %2, label %for.cond.cleanup, label %vector.body
2881
2882for.cond.cleanup:                                 ; preds = %vector.body
2883  ret void
2884}
2885
2886define void @sink_splat_min_commute(ptr nocapture %a, i32 signext %x) {
2887; CHECK-LABEL: sink_splat_min_commute:
2888; CHECK:       # %bb.0: # %entry
2889; CHECK-NEXT:    li a2, 1024
2890; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2891; CHECK-NEXT:  .LBB47_1: # %vector.body
2892; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2893; CHECK-NEXT:    vle32.v v8, (a0)
2894; CHECK-NEXT:    addi a2, a2, 4
2895; CHECK-NEXT:    vmin.vx v8, v8, a1
2896; CHECK-NEXT:    vse32.v v8, (a0)
2897; CHECK-NEXT:    addi a0, a0, -16
2898; CHECK-NEXT:    bnez a2, .LBB47_1
2899; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2900; CHECK-NEXT:    ret
2901entry:
2902  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2903  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2904  br label %vector.body
2905
2906vector.body:                                      ; preds = %vector.body, %entry
2907  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2908  %0 = getelementptr inbounds i32, ptr %a, i64 %index
2909  %wide.load = load <4 x i32>, ptr %0, align 4
2910  %1 = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
2911  store <4 x i32> %1, ptr %0, align 4
2912  %index.next = sub nuw i64 %index, 4
2913  %2 = icmp eq i64 %index.next, 1024
2914  br i1 %2, label %for.cond.cleanup, label %vector.body
2915
2916for.cond.cleanup:                                 ; preds = %vector.body
2917  ret void
2918}
2919
2920declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>)
2921
2922define void @sink_splat_max(ptr nocapture %a, i32 signext %x) {
2923; CHECK-LABEL: sink_splat_max:
2924; CHECK:       # %bb.0: # %entry
2925; CHECK-NEXT:    li a2, 1024
2926; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2927; CHECK-NEXT:  .LBB48_1: # %vector.body
2928; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2929; CHECK-NEXT:    vle32.v v8, (a0)
2930; CHECK-NEXT:    addi a2, a2, 4
2931; CHECK-NEXT:    vmax.vx v8, v8, a1
2932; CHECK-NEXT:    vse32.v v8, (a0)
2933; CHECK-NEXT:    addi a0, a0, -16
2934; CHECK-NEXT:    bnez a2, .LBB48_1
2935; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2936; CHECK-NEXT:    ret
2937entry:
2938  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2939  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2940  br label %vector.body
2941
2942vector.body:                                      ; preds = %vector.body, %entry
2943  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2944  %0 = getelementptr inbounds i32, ptr %a, i64 %index
2945  %wide.load = load <4 x i32>, ptr %0, align 4
2946  %1 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
2947  store <4 x i32> %1, ptr %0, align 4
2948  %index.next = sub nuw i64 %index, 4
2949  %2 = icmp eq i64 %index.next, 1024
2950  br i1 %2, label %for.cond.cleanup, label %vector.body
2951
2952for.cond.cleanup:                                 ; preds = %vector.body
2953  ret void
2954}
2955
2956define void @sink_splat_max_commute(ptr nocapture %a, i32 signext %x) {
2957; CHECK-LABEL: sink_splat_max_commute:
2958; CHECK:       # %bb.0: # %entry
2959; CHECK-NEXT:    li a2, 1024
2960; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2961; CHECK-NEXT:  .LBB49_1: # %vector.body
2962; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2963; CHECK-NEXT:    vle32.v v8, (a0)
2964; CHECK-NEXT:    addi a2, a2, 4
2965; CHECK-NEXT:    vmax.vx v8, v8, a1
2966; CHECK-NEXT:    vse32.v v8, (a0)
2967; CHECK-NEXT:    addi a0, a0, -16
2968; CHECK-NEXT:    bnez a2, .LBB49_1
2969; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
2970; CHECK-NEXT:    ret
2971entry:
2972  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
2973  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
2974  br label %vector.body
2975
2976vector.body:                                      ; preds = %vector.body, %entry
2977  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
2978  %0 = getelementptr inbounds i32, ptr %a, i64 %index
2979  %wide.load = load <4 x i32>, ptr %0, align 4
2980  %1 = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
2981  store <4 x i32> %1, ptr %0, align 4
2982  %index.next = sub nuw i64 %index, 4
2983  %2 = icmp eq i64 %index.next, 1024
2984  br i1 %2, label %for.cond.cleanup, label %vector.body
2985
2986for.cond.cleanup:                                 ; preds = %vector.body
2987  ret void
2988}
2989
2990declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
2991
2992define void @sink_splat_umin(ptr nocapture %a, i32 signext %x) {
2993; CHECK-LABEL: sink_splat_umin:
2994; CHECK:       # %bb.0: # %entry
2995; CHECK-NEXT:    li a2, 1024
2996; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2997; CHECK-NEXT:  .LBB50_1: # %vector.body
2998; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
2999; CHECK-NEXT:    vle32.v v8, (a0)
3000; CHECK-NEXT:    addi a2, a2, 4
3001; CHECK-NEXT:    vminu.vx v8, v8, a1
3002; CHECK-NEXT:    vse32.v v8, (a0)
3003; CHECK-NEXT:    addi a0, a0, -16
3004; CHECK-NEXT:    bnez a2, .LBB50_1
3005; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3006; CHECK-NEXT:    ret
3007entry:
3008  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3009  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3010  br label %vector.body
3011
3012vector.body:                                      ; preds = %vector.body, %entry
3013  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3014  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3015  %wide.load = load <4 x i32>, ptr %0, align 4
3016  %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3017  store <4 x i32> %1, ptr %0, align 4
3018  %index.next = sub nuw i64 %index, 4
3019  %2 = icmp eq i64 %index.next, 1024
3020  br i1 %2, label %for.cond.cleanup, label %vector.body
3021
3022for.cond.cleanup:                                 ; preds = %vector.body
3023  ret void
3024}
3025
3026define void @sink_splat_umin_commute(ptr nocapture %a, i32 signext %x) {
3027; CHECK-LABEL: sink_splat_umin_commute:
3028; CHECK:       # %bb.0: # %entry
3029; CHECK-NEXT:    li a2, 1024
3030; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3031; CHECK-NEXT:  .LBB51_1: # %vector.body
3032; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3033; CHECK-NEXT:    vle32.v v8, (a0)
3034; CHECK-NEXT:    addi a2, a2, 4
3035; CHECK-NEXT:    vminu.vx v8, v8, a1
3036; CHECK-NEXT:    vse32.v v8, (a0)
3037; CHECK-NEXT:    addi a0, a0, -16
3038; CHECK-NEXT:    bnez a2, .LBB51_1
3039; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3040; CHECK-NEXT:    ret
3041entry:
3042  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3043  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3044  br label %vector.body
3045
3046vector.body:                                      ; preds = %vector.body, %entry
3047  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3048  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3049  %wide.load = load <4 x i32>, ptr %0, align 4
3050  %1 = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
3051  store <4 x i32> %1, ptr %0, align 4
3052  %index.next = sub nuw i64 %index, 4
3053  %2 = icmp eq i64 %index.next, 1024
3054  br i1 %2, label %for.cond.cleanup, label %vector.body
3055
3056for.cond.cleanup:                                 ; preds = %vector.body
3057  ret void
3058}
3059
3060declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
3061
3062define void @sink_splat_umax(ptr nocapture %a, i32 signext %x) {
3063; CHECK-LABEL: sink_splat_umax:
3064; CHECK:       # %bb.0: # %entry
3065; CHECK-NEXT:    li a2, 1024
3066; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3067; CHECK-NEXT:  .LBB52_1: # %vector.body
3068; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3069; CHECK-NEXT:    vle32.v v8, (a0)
3070; CHECK-NEXT:    addi a2, a2, 4
3071; CHECK-NEXT:    vmaxu.vx v8, v8, a1
3072; CHECK-NEXT:    vse32.v v8, (a0)
3073; CHECK-NEXT:    addi a0, a0, -16
3074; CHECK-NEXT:    bnez a2, .LBB52_1
3075; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3076; CHECK-NEXT:    ret
3077entry:
3078  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3079  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3080  br label %vector.body
3081
3082vector.body:                                      ; preds = %vector.body, %entry
3083  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3084  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3085  %wide.load = load <4 x i32>, ptr %0, align 4
3086  %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3087  store <4 x i32> %1, ptr %0, align 4
3088  %index.next = sub nuw i64 %index, 4
3089  %2 = icmp eq i64 %index.next, 1024
3090  br i1 %2, label %for.cond.cleanup, label %vector.body
3091
3092for.cond.cleanup:                                 ; preds = %vector.body
3093  ret void
3094}
3095
3096define void @sink_splat_umax_commute(ptr nocapture %a, i32 signext %x) {
3097; CHECK-LABEL: sink_splat_umax_commute:
3098; CHECK:       # %bb.0: # %entry
3099; CHECK-NEXT:    li a2, 1024
3100; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3101; CHECK-NEXT:  .LBB53_1: # %vector.body
3102; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3103; CHECK-NEXT:    vle32.v v8, (a0)
3104; CHECK-NEXT:    addi a2, a2, 4
3105; CHECK-NEXT:    vmaxu.vx v8, v8, a1
3106; CHECK-NEXT:    vse32.v v8, (a0)
3107; CHECK-NEXT:    addi a0, a0, -16
3108; CHECK-NEXT:    bnez a2, .LBB53_1
3109; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3110; CHECK-NEXT:    ret
3111entry:
3112  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3113  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3114  br label %vector.body
3115
3116vector.body:                                      ; preds = %vector.body, %entry
3117  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3118  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3119  %wide.load = load <4 x i32>, ptr %0, align 4
3120  %1 = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
3121  store <4 x i32> %1, ptr %0, align 4
3122  %index.next = sub nuw i64 %index, 4
3123  %2 = icmp eq i64 %index.next, 1024
3124  br i1 %2, label %for.cond.cleanup, label %vector.body
3125
3126for.cond.cleanup:                                 ; preds = %vector.body
3127  ret void
3128}
3129
3130declare <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32>, <4 x i32>)
3131
3132define void @sink_splat_sadd_sat(ptr nocapture %a, i32 signext %x) {
3133; CHECK-LABEL: sink_splat_sadd_sat:
3134; CHECK:       # %bb.0: # %entry
3135; CHECK-NEXT:    lui a2, 1
3136; CHECK-NEXT:    add a2, a0, a2
3137; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3138; CHECK-NEXT:  .LBB54_1: # %vector.body
3139; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3140; CHECK-NEXT:    vle32.v v8, (a0)
3141; CHECK-NEXT:    vsadd.vx v8, v8, a1
3142; CHECK-NEXT:    vse32.v v8, (a0)
3143; CHECK-NEXT:    addi a0, a0, 16
3144; CHECK-NEXT:    bne a0, a2, .LBB54_1
3145; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3146; CHECK-NEXT:    ret
3147entry:
3148  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3149  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3150  br label %vector.body
3151
3152vector.body:                                      ; preds = %vector.body, %entry
3153  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3154  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3155  %wide.load = load <4 x i32>, ptr %0, align 4
3156  %1 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3157  store <4 x i32> %1, ptr %0, align 4
3158  %index.next = add nuw i64 %index, 4
3159  %2 = icmp eq i64 %index.next, 1024
3160  br i1 %2, label %for.cond.cleanup, label %vector.body
3161
3162for.cond.cleanup:                                 ; preds = %vector.body
3163  ret void
3164}
3165
3166define void @sink_splat_sadd_sat_commute(ptr nocapture %a, i32 signext %x) {
3167; CHECK-LABEL: sink_splat_sadd_sat_commute:
3168; CHECK:       # %bb.0: # %entry
3169; CHECK-NEXT:    lui a2, 1
3170; CHECK-NEXT:    add a2, a0, a2
3171; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3172; CHECK-NEXT:  .LBB55_1: # %vector.body
3173; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3174; CHECK-NEXT:    vle32.v v8, (a0)
3175; CHECK-NEXT:    vsadd.vx v8, v8, a1
3176; CHECK-NEXT:    vse32.v v8, (a0)
3177; CHECK-NEXT:    addi a0, a0, 16
3178; CHECK-NEXT:    bne a0, a2, .LBB55_1
3179; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3180; CHECK-NEXT:    ret
3181entry:
3182  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3183  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3184  br label %vector.body
3185
3186vector.body:                                      ; preds = %vector.body, %entry
3187  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3188  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3189  %wide.load = load <4 x i32>, ptr %0, align 4
3190  %1 = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
3191  store <4 x i32> %1, ptr %0, align 4
3192  %index.next = add nuw i64 %index, 4
3193  %2 = icmp eq i64 %index.next, 1024
3194  br i1 %2, label %for.cond.cleanup, label %vector.body
3195
3196for.cond.cleanup:                                 ; preds = %vector.body
3197  ret void
3198}
3199
3200declare <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32>, <4 x i32>)
3201
3202define void @sink_splat_ssub_sat(ptr nocapture %a, i32 signext %x) {
3203; CHECK-LABEL: sink_splat_ssub_sat:
3204; CHECK:       # %bb.0: # %entry
3205; CHECK-NEXT:    li a2, 1024
3206; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3207; CHECK-NEXT:  .LBB56_1: # %vector.body
3208; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3209; CHECK-NEXT:    vle32.v v8, (a0)
3210; CHECK-NEXT:    addi a2, a2, 4
3211; CHECK-NEXT:    vssub.vx v8, v8, a1
3212; CHECK-NEXT:    vse32.v v8, (a0)
3213; CHECK-NEXT:    addi a0, a0, -16
3214; CHECK-NEXT:    bnez a2, .LBB56_1
3215; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3216; CHECK-NEXT:    ret
3217entry:
3218  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3219  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3220  br label %vector.body
3221
3222vector.body:                                      ; preds = %vector.body, %entry
3223  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3224  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3225  %wide.load = load <4 x i32>, ptr %0, align 4
3226  %1 = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3227  store <4 x i32> %1, ptr %0, align 4
3228  %index.next = sub nuw i64 %index, 4
3229  %2 = icmp eq i64 %index.next, 1024
3230  br i1 %2, label %for.cond.cleanup, label %vector.body
3231
3232for.cond.cleanup:                                 ; preds = %vector.body
3233  ret void
3234}
3235
3236declare <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32>, <4 x i32>)
3237
3238define void @sink_splat_uadd_sat(ptr nocapture %a, i32 signext %x) {
3239; CHECK-LABEL: sink_splat_uadd_sat:
3240; CHECK:       # %bb.0: # %entry
3241; CHECK-NEXT:    lui a2, 1
3242; CHECK-NEXT:    add a2, a0, a2
3243; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3244; CHECK-NEXT:  .LBB57_1: # %vector.body
3245; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3246; CHECK-NEXT:    vle32.v v8, (a0)
3247; CHECK-NEXT:    vsaddu.vx v8, v8, a1
3248; CHECK-NEXT:    vse32.v v8, (a0)
3249; CHECK-NEXT:    addi a0, a0, 16
3250; CHECK-NEXT:    bne a0, a2, .LBB57_1
3251; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3252; CHECK-NEXT:    ret
3253entry:
3254  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3255  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3256  br label %vector.body
3257
3258vector.body:                                      ; preds = %vector.body, %entry
3259  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3260  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3261  %wide.load = load <4 x i32>, ptr %0, align 4
3262  %1 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3263  store <4 x i32> %1, ptr %0, align 4
3264  %index.next = add nuw i64 %index, 4
3265  %2 = icmp eq i64 %index.next, 1024
3266  br i1 %2, label %for.cond.cleanup, label %vector.body
3267
3268for.cond.cleanup:                                 ; preds = %vector.body
3269  ret void
3270}
3271
3272define void @sink_splat_uadd_sat_commute(ptr nocapture %a, i32 signext %x) {
3273; CHECK-LABEL: sink_splat_uadd_sat_commute:
3274; CHECK:       # %bb.0: # %entry
3275; CHECK-NEXT:    lui a2, 1
3276; CHECK-NEXT:    add a2, a0, a2
3277; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3278; CHECK-NEXT:  .LBB58_1: # %vector.body
3279; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3280; CHECK-NEXT:    vle32.v v8, (a0)
3281; CHECK-NEXT:    vsaddu.vx v8, v8, a1
3282; CHECK-NEXT:    vse32.v v8, (a0)
3283; CHECK-NEXT:    addi a0, a0, 16
3284; CHECK-NEXT:    bne a0, a2, .LBB58_1
3285; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3286; CHECK-NEXT:    ret
3287entry:
3288  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3289  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3290  br label %vector.body
3291
3292vector.body:                                      ; preds = %vector.body, %entry
3293  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3294  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3295  %wide.load = load <4 x i32>, ptr %0, align 4
3296  %1 = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load)
3297  store <4 x i32> %1, ptr %0, align 4
3298  %index.next = add nuw i64 %index, 4
3299  %2 = icmp eq i64 %index.next, 1024
3300  br i1 %2, label %for.cond.cleanup, label %vector.body
3301
3302for.cond.cleanup:                                 ; preds = %vector.body
3303  ret void
3304}
3305
3306declare <4 x i32> @llvm.usub.sat.v4i32(<4 x i32>, <4 x i32>)
3307
3308define void @sink_splat_usub_sat(ptr nocapture %a, i32 signext %x) {
3309; CHECK-LABEL: sink_splat_usub_sat:
3310; CHECK:       # %bb.0: # %entry
3311; CHECK-NEXT:    li a2, 1024
3312; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3313; CHECK-NEXT:  .LBB59_1: # %vector.body
3314; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3315; CHECK-NEXT:    vle32.v v8, (a0)
3316; CHECK-NEXT:    addi a2, a2, 4
3317; CHECK-NEXT:    vssubu.vx v8, v8, a1
3318; CHECK-NEXT:    vse32.v v8, (a0)
3319; CHECK-NEXT:    addi a0, a0, -16
3320; CHECK-NEXT:    bnez a2, .LBB59_1
3321; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3322; CHECK-NEXT:    ret
3323entry:
3324  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3325  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3326  br label %vector.body
3327
3328vector.body:                                      ; preds = %vector.body, %entry
3329  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3330  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3331  %wide.load = load <4 x i32>, ptr %0, align 4
3332  %1 = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat)
3333  store <4 x i32> %1, ptr %0, align 4
3334  %index.next = sub nuw i64 %index, 4
3335  %2 = icmp eq i64 %index.next, 1024
3336  br i1 %2, label %for.cond.cleanup, label %vector.body
3337
3338for.cond.cleanup:                                 ; preds = %vector.body
3339  ret void
3340}
3341
3342declare <4 x i32> @llvm.vp.mul.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3343
3344define void @sink_splat_vp_mul(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3345; CHECK-LABEL: sink_splat_vp_mul:
3346; CHECK:       # %bb.0: # %entry
3347; CHECK-NEXT:    lui a3, 1
3348; CHECK-NEXT:    add a3, a0, a3
3349; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3350; CHECK-NEXT:  .LBB60_1: # %vector.body
3351; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3352; CHECK-NEXT:    vle32.v v8, (a0)
3353; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3354; CHECK-NEXT:    vmul.vx v8, v8, a1, v0.t
3355; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3356; CHECK-NEXT:    vse32.v v8, (a0)
3357; CHECK-NEXT:    addi a0, a0, 16
3358; CHECK-NEXT:    bne a0, a3, .LBB60_1
3359; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3360; CHECK-NEXT:    ret
3361entry:
3362  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3363  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3364  br label %vector.body
3365
3366vector.body:                                      ; preds = %vector.body, %entry
3367  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3368  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3369  %wide.load = load <4 x i32>, ptr %0, align 4
3370  %1 = call <4 x i32> @llvm.vp.mul.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3371  store <4 x i32> %1, ptr %0, align 4
3372  %index.next = add nuw i64 %index, 4
3373  %2 = icmp eq i64 %index.next, 1024
3374  br i1 %2, label %for.cond.cleanup, label %vector.body
3375
3376for.cond.cleanup:                                 ; preds = %vector.body
3377  ret void
3378}
3379
3380declare <4 x i32> @llvm.vp.add.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3381
3382define void @sink_splat_vp_add(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3383; CHECK-LABEL: sink_splat_vp_add:
3384; CHECK:       # %bb.0: # %entry
3385; CHECK-NEXT:    lui a3, 1
3386; CHECK-NEXT:    add a3, a0, a3
3387; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3388; CHECK-NEXT:  .LBB61_1: # %vector.body
3389; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3390; CHECK-NEXT:    vle32.v v8, (a0)
3391; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3392; CHECK-NEXT:    vadd.vx v8, v8, a1, v0.t
3393; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3394; CHECK-NEXT:    vse32.v v8, (a0)
3395; CHECK-NEXT:    addi a0, a0, 16
3396; CHECK-NEXT:    bne a0, a3, .LBB61_1
3397; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3398; CHECK-NEXT:    ret
3399entry:
3400  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3401  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3402  br label %vector.body
3403
3404vector.body:                                      ; preds = %vector.body, %entry
3405  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3406  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3407  %wide.load = load <4 x i32>, ptr %0, align 4
3408  %1 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3409  store <4 x i32> %1, ptr %0, align 4
3410  %index.next = add nuw i64 %index, 4
3411  %2 = icmp eq i64 %index.next, 1024
3412  br i1 %2, label %for.cond.cleanup, label %vector.body
3413
3414for.cond.cleanup:                                 ; preds = %vector.body
3415  ret void
3416}
3417
3418define void @sink_splat_vp_add_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3419; CHECK-LABEL: sink_splat_vp_add_commute:
3420; CHECK:       # %bb.0: # %entry
3421; CHECK-NEXT:    lui a3, 1
3422; CHECK-NEXT:    add a3, a0, a3
3423; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3424; CHECK-NEXT:  .LBB62_1: # %vector.body
3425; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3426; CHECK-NEXT:    vle32.v v8, (a0)
3427; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3428; CHECK-NEXT:    vadd.vx v8, v8, a1, v0.t
3429; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3430; CHECK-NEXT:    vse32.v v8, (a0)
3431; CHECK-NEXT:    addi a0, a0, 16
3432; CHECK-NEXT:    bne a0, a3, .LBB62_1
3433; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3434; CHECK-NEXT:    ret
3435entry:
3436  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3437  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3438  br label %vector.body
3439
3440vector.body:                                      ; preds = %vector.body, %entry
3441  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3442  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3443  %wide.load = load <4 x i32>, ptr %0, align 4
3444  %1 = call <4 x i32> @llvm.vp.add.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
3445  store <4 x i32> %1, ptr %0, align 4
3446  %index.next = add nuw i64 %index, 4
3447  %2 = icmp eq i64 %index.next, 1024
3448  br i1 %2, label %for.cond.cleanup, label %vector.body
3449
3450for.cond.cleanup:                                 ; preds = %vector.body
3451  ret void
3452}
3453
3454declare <4 x i32> @llvm.vp.sub.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3455
3456define void @sink_splat_vp_sub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3457; CHECK-LABEL: sink_splat_vp_sub:
3458; CHECK:       # %bb.0: # %entry
3459; CHECK-NEXT:    lui a3, 1
3460; CHECK-NEXT:    add a3, a0, a3
3461; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3462; CHECK-NEXT:  .LBB63_1: # %vector.body
3463; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3464; CHECK-NEXT:    vle32.v v8, (a0)
3465; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3466; CHECK-NEXT:    vsub.vx v8, v8, a1, v0.t
3467; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3468; CHECK-NEXT:    vse32.v v8, (a0)
3469; CHECK-NEXT:    addi a0, a0, 16
3470; CHECK-NEXT:    bne a0, a3, .LBB63_1
3471; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3472; CHECK-NEXT:    ret
3473entry:
3474  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3475  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3476  br label %vector.body
3477
3478vector.body:                                      ; preds = %vector.body, %entry
3479  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3480  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3481  %wide.load = load <4 x i32>, ptr %0, align 4
3482  %1 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3483  store <4 x i32> %1, ptr %0, align 4
3484  %index.next = add nuw i64 %index, 4
3485  %2 = icmp eq i64 %index.next, 1024
3486  br i1 %2, label %for.cond.cleanup, label %vector.body
3487
3488for.cond.cleanup:                                 ; preds = %vector.body
3489  ret void
3490}
3491
3492define void @sink_splat_vp_rsub(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3493; CHECK-LABEL: sink_splat_vp_rsub:
3494; CHECK:       # %bb.0: # %entry
3495; CHECK-NEXT:    lui a3, 1
3496; CHECK-NEXT:    add a3, a0, a3
3497; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3498; CHECK-NEXT:  .LBB64_1: # %vector.body
3499; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3500; CHECK-NEXT:    vle32.v v8, (a0)
3501; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3502; CHECK-NEXT:    vrsub.vx v8, v8, a1, v0.t
3503; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3504; CHECK-NEXT:    vse32.v v8, (a0)
3505; CHECK-NEXT:    addi a0, a0, 16
3506; CHECK-NEXT:    bne a0, a3, .LBB64_1
3507; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3508; CHECK-NEXT:    ret
3509entry:
3510  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3511  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3512  br label %vector.body
3513
3514vector.body:                                      ; preds = %vector.body, %entry
3515  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3516  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3517  %wide.load = load <4 x i32>, ptr %0, align 4
3518  %1 = call <4 x i32> @llvm.vp.sub.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
3519  store <4 x i32> %1, ptr %0, align 4
3520  %index.next = add nuw i64 %index, 4
3521  %2 = icmp eq i64 %index.next, 1024
3522  br i1 %2, label %for.cond.cleanup, label %vector.body
3523
3524for.cond.cleanup:                                 ; preds = %vector.body
3525  ret void
3526}
3527
3528declare <4 x i32> @llvm.vp.shl.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3529
3530define void @sink_splat_vp_shl(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3531; CHECK-LABEL: sink_splat_vp_shl:
3532; CHECK:       # %bb.0: # %entry
3533; CHECK-NEXT:    lui a3, 1
3534; CHECK-NEXT:    add a3, a0, a3
3535; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3536; CHECK-NEXT:  .LBB65_1: # %vector.body
3537; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3538; CHECK-NEXT:    vle32.v v8, (a0)
3539; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3540; CHECK-NEXT:    vsll.vx v8, v8, a1, v0.t
3541; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3542; CHECK-NEXT:    vse32.v v8, (a0)
3543; CHECK-NEXT:    addi a0, a0, 16
3544; CHECK-NEXT:    bne a0, a3, .LBB65_1
3545; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3546; CHECK-NEXT:    ret
3547entry:
3548  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3549  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3550  br label %vector.body
3551
3552vector.body:                                      ; preds = %vector.body, %entry
3553  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3554  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3555  %wide.load = load <4 x i32>, ptr %0, align 4
3556  %1 = call <4 x i32> @llvm.vp.shl.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3557  store <4 x i32> %1, ptr %0, align 4
3558  %index.next = add nuw i64 %index, 4
3559  %2 = icmp eq i64 %index.next, 1024
3560  br i1 %2, label %for.cond.cleanup, label %vector.body
3561
3562for.cond.cleanup:                                 ; preds = %vector.body
3563  ret void
3564}
3565
3566declare <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3567
3568define void @sink_splat_vp_lshr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3569; CHECK-LABEL: sink_splat_vp_lshr:
3570; CHECK:       # %bb.0: # %entry
3571; CHECK-NEXT:    lui a3, 1
3572; CHECK-NEXT:    add a3, a0, a3
3573; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3574; CHECK-NEXT:  .LBB66_1: # %vector.body
3575; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3576; CHECK-NEXT:    vle32.v v8, (a0)
3577; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3578; CHECK-NEXT:    vsrl.vx v8, v8, a1, v0.t
3579; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3580; CHECK-NEXT:    vse32.v v8, (a0)
3581; CHECK-NEXT:    addi a0, a0, 16
3582; CHECK-NEXT:    bne a0, a3, .LBB66_1
3583; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3584; CHECK-NEXT:    ret
3585entry:
3586  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3587  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3588  br label %vector.body
3589
3590vector.body:                                      ; preds = %vector.body, %entry
3591  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3592  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3593  %wide.load = load <4 x i32>, ptr %0, align 4
3594  %1 = call <4 x i32> @llvm.vp.lshr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3595  store <4 x i32> %1, ptr %0, align 4
3596  %index.next = add nuw i64 %index, 4
3597  %2 = icmp eq i64 %index.next, 1024
3598  br i1 %2, label %for.cond.cleanup, label %vector.body
3599
3600for.cond.cleanup:                                 ; preds = %vector.body
3601  ret void
3602}
3603
3604declare <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3605
3606define void @sink_splat_vp_ashr(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3607; CHECK-LABEL: sink_splat_vp_ashr:
3608; CHECK:       # %bb.0: # %entry
3609; CHECK-NEXT:    lui a3, 1
3610; CHECK-NEXT:    add a3, a0, a3
3611; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3612; CHECK-NEXT:  .LBB67_1: # %vector.body
3613; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3614; CHECK-NEXT:    vle32.v v8, (a0)
3615; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3616; CHECK-NEXT:    vsra.vx v8, v8, a1, v0.t
3617; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3618; CHECK-NEXT:    vse32.v v8, (a0)
3619; CHECK-NEXT:    addi a0, a0, 16
3620; CHECK-NEXT:    bne a0, a3, .LBB67_1
3621; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3622; CHECK-NEXT:    ret
3623entry:
3624  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3625  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3626  br label %vector.body
3627
3628vector.body:                                      ; preds = %vector.body, %entry
3629  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3630  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3631  %wide.load = load <4 x i32>, ptr %0, align 4
3632  %1 = call <4 x i32> @llvm.vp.ashr.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3633  store <4 x i32> %1, ptr %0, align 4
3634  %index.next = add nuw i64 %index, 4
3635  %2 = icmp eq i64 %index.next, 1024
3636  br i1 %2, label %for.cond.cleanup, label %vector.body
3637
3638for.cond.cleanup:                                 ; preds = %vector.body
3639  ret void
3640}
3641
3642declare <4 x float> @llvm.vp.fmul.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3643
3644define void @sink_splat_vp_fmul(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3645; CHECK-LABEL: sink_splat_vp_fmul:
3646; CHECK:       # %bb.0: # %entry
3647; CHECK-NEXT:    lui a2, 1
3648; CHECK-NEXT:    add a2, a0, a2
3649; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3650; CHECK-NEXT:  .LBB68_1: # %vector.body
3651; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3652; CHECK-NEXT:    vle32.v v8, (a0)
3653; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
3654; CHECK-NEXT:    vfmul.vf v8, v8, fa0, v0.t
3655; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3656; CHECK-NEXT:    vse32.v v8, (a0)
3657; CHECK-NEXT:    addi a0, a0, 16
3658; CHECK-NEXT:    bne a0, a2, .LBB68_1
3659; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3660; CHECK-NEXT:    ret
3661entry:
3662  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3663  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3664  br label %vector.body
3665
3666vector.body:                                      ; preds = %vector.body, %entry
3667  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3668  %0 = getelementptr inbounds float, ptr %a, i64 %index
3669  %wide.load = load <4 x float>, ptr %0, align 4
3670  %1 = call <4 x float> @llvm.vp.fmul.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3671  store <4 x float> %1, ptr %0, align 4
3672  %index.next = add nuw i64 %index, 4
3673  %2 = icmp eq i64 %index.next, 1024
3674  br i1 %2, label %for.cond.cleanup, label %vector.body
3675
3676for.cond.cleanup:                                 ; preds = %vector.body
3677  ret void
3678}
3679
3680declare <4 x float> @llvm.vp.fdiv.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3681
3682define void @sink_splat_vp_fdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3683; CHECK-LABEL: sink_splat_vp_fdiv:
3684; CHECK:       # %bb.0: # %entry
3685; CHECK-NEXT:    lui a2, 1
3686; CHECK-NEXT:    add a2, a0, a2
3687; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3688; CHECK-NEXT:  .LBB69_1: # %vector.body
3689; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3690; CHECK-NEXT:    vle32.v v8, (a0)
3691; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
3692; CHECK-NEXT:    vfdiv.vf v8, v8, fa0, v0.t
3693; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3694; CHECK-NEXT:    vse32.v v8, (a0)
3695; CHECK-NEXT:    addi a0, a0, 16
3696; CHECK-NEXT:    bne a0, a2, .LBB69_1
3697; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3698; CHECK-NEXT:    ret
3699entry:
3700  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3701  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3702  br label %vector.body
3703
3704vector.body:                                      ; preds = %vector.body, %entry
3705  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3706  %0 = getelementptr inbounds float, ptr %a, i64 %index
3707  %wide.load = load <4 x float>, ptr %0, align 4
3708  %1 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3709  store <4 x float> %1, ptr %0, align 4
3710  %index.next = add nuw i64 %index, 4
3711  %2 = icmp eq i64 %index.next, 1024
3712  br i1 %2, label %for.cond.cleanup, label %vector.body
3713
3714for.cond.cleanup:                                 ; preds = %vector.body
3715  ret void
3716}
3717
3718define void @sink_splat_vp_frdiv(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3719; CHECK-LABEL: sink_splat_vp_frdiv:
3720; CHECK:       # %bb.0: # %entry
3721; CHECK-NEXT:    lui a2, 1
3722; CHECK-NEXT:    add a2, a0, a2
3723; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3724; CHECK-NEXT:  .LBB70_1: # %vector.body
3725; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3726; CHECK-NEXT:    vle32.v v8, (a0)
3727; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
3728; CHECK-NEXT:    vfrdiv.vf v8, v8, fa0, v0.t
3729; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3730; CHECK-NEXT:    vse32.v v8, (a0)
3731; CHECK-NEXT:    addi a0, a0, 16
3732; CHECK-NEXT:    bne a0, a2, .LBB70_1
3733; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3734; CHECK-NEXT:    ret
3735entry:
3736  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3737  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3738  br label %vector.body
3739
3740vector.body:                                      ; preds = %vector.body, %entry
3741  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3742  %0 = getelementptr inbounds float, ptr %a, i64 %index
3743  %wide.load = load <4 x float>, ptr %0, align 4
3744  %1 = call <4 x float> @llvm.vp.fdiv.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl)
3745  store <4 x float> %1, ptr %0, align 4
3746  %index.next = add nuw i64 %index, 4
3747  %2 = icmp eq i64 %index.next, 1024
3748  br i1 %2, label %for.cond.cleanup, label %vector.body
3749
3750for.cond.cleanup:                                 ; preds = %vector.body
3751  ret void
3752}
3753
3754declare <4 x float> @llvm.vp.fadd.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3755
3756define void @sink_splat_vp_fadd(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3757; CHECK-LABEL: sink_splat_vp_fadd:
3758; CHECK:       # %bb.0: # %entry
3759; CHECK-NEXT:    lui a2, 1
3760; CHECK-NEXT:    add a2, a0, a2
3761; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3762; CHECK-NEXT:  .LBB71_1: # %vector.body
3763; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3764; CHECK-NEXT:    vle32.v v8, (a0)
3765; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
3766; CHECK-NEXT:    vfadd.vf v8, v8, fa0, v0.t
3767; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3768; CHECK-NEXT:    vse32.v v8, (a0)
3769; CHECK-NEXT:    addi a0, a0, 16
3770; CHECK-NEXT:    bne a0, a2, .LBB71_1
3771; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3772; CHECK-NEXT:    ret
3773entry:
3774  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3775  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3776  br label %vector.body
3777
3778vector.body:                                      ; preds = %vector.body, %entry
3779  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3780  %0 = getelementptr inbounds float, ptr %a, i64 %index
3781  %wide.load = load <4 x float>, ptr %0, align 4
3782  %1 = call <4 x float> @llvm.vp.fadd.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3783  store <4 x float> %1, ptr %0, align 4
3784  %index.next = add nuw i64 %index, 4
3785  %2 = icmp eq i64 %index.next, 1024
3786  br i1 %2, label %for.cond.cleanup, label %vector.body
3787
3788for.cond.cleanup:                                 ; preds = %vector.body
3789  ret void
3790}
3791
3792declare <4 x float> @llvm.vp.fsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3793
3794define void @sink_splat_vp_fsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3795; CHECK-LABEL: sink_splat_vp_fsub:
3796; CHECK:       # %bb.0: # %entry
3797; CHECK-NEXT:    lui a2, 1
3798; CHECK-NEXT:    add a2, a0, a2
3799; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3800; CHECK-NEXT:  .LBB72_1: # %vector.body
3801; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3802; CHECK-NEXT:    vle32.v v8, (a0)
3803; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
3804; CHECK-NEXT:    vfsub.vf v8, v8, fa0, v0.t
3805; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3806; CHECK-NEXT:    vse32.v v8, (a0)
3807; CHECK-NEXT:    addi a0, a0, 16
3808; CHECK-NEXT:    bne a0, a2, .LBB72_1
3809; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3810; CHECK-NEXT:    ret
3811entry:
3812  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3813  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3814  br label %vector.body
3815
3816vector.body:                                      ; preds = %vector.body, %entry
3817  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3818  %0 = getelementptr inbounds float, ptr %a, i64 %index
3819  %wide.load = load <4 x float>, ptr %0, align 4
3820  %1 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x i1> %m, i32 %vl)
3821  store <4 x float> %1, ptr %0, align 4
3822  %index.next = add nuw i64 %index, 4
3823  %2 = icmp eq i64 %index.next, 1024
3824  br i1 %2, label %for.cond.cleanup, label %vector.body
3825
3826for.cond.cleanup:                                 ; preds = %vector.body
3827  ret void
3828}
3829
3830declare <4 x float> @llvm.vp.frsub.v4i32(<4 x float>, <4 x float>, <4 x i1>, i32)
3831
3832define void @sink_splat_vp_frsub(ptr nocapture %a, float %x, <4 x i1> %m, i32 zeroext %vl) {
3833; CHECK-LABEL: sink_splat_vp_frsub:
3834; CHECK:       # %bb.0: # %entry
3835; CHECK-NEXT:    lui a2, 1
3836; CHECK-NEXT:    add a2, a0, a2
3837; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3838; CHECK-NEXT:  .LBB73_1: # %vector.body
3839; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3840; CHECK-NEXT:    vle32.v v8, (a0)
3841; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
3842; CHECK-NEXT:    vfrsub.vf v8, v8, fa0, v0.t
3843; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3844; CHECK-NEXT:    vse32.v v8, (a0)
3845; CHECK-NEXT:    addi a0, a0, 16
3846; CHECK-NEXT:    bne a0, a2, .LBB73_1
3847; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3848; CHECK-NEXT:    ret
3849entry:
3850  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
3851  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
3852  br label %vector.body
3853
3854vector.body:                                      ; preds = %vector.body, %entry
3855  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3856  %0 = getelementptr inbounds float, ptr %a, i64 %index
3857  %wide.load = load <4 x float>, ptr %0, align 4
3858  %1 = call <4 x float> @llvm.vp.fsub.v4i32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x i1> %m, i32 %vl)
3859  store <4 x float> %1, ptr %0, align 4
3860  %index.next = add nuw i64 %index, 4
3861  %2 = icmp eq i64 %index.next, 1024
3862  br i1 %2, label %for.cond.cleanup, label %vector.body
3863
3864for.cond.cleanup:                                 ; preds = %vector.body
3865  ret void
3866}
3867
3868declare <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3869
3870define void @sink_splat_vp_udiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3871; CHECK-LABEL: sink_splat_vp_udiv:
3872; CHECK:       # %bb.0: # %entry
3873; CHECK-NEXT:    lui a3, 1
3874; CHECK-NEXT:    add a3, a0, a3
3875; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3876; CHECK-NEXT:  .LBB74_1: # %vector.body
3877; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3878; CHECK-NEXT:    vle32.v v8, (a0)
3879; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3880; CHECK-NEXT:    vdivu.vx v8, v8, a1, v0.t
3881; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3882; CHECK-NEXT:    vse32.v v8, (a0)
3883; CHECK-NEXT:    addi a0, a0, 16
3884; CHECK-NEXT:    bne a0, a3, .LBB74_1
3885; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3886; CHECK-NEXT:    ret
3887entry:
3888  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3889  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3890  br label %vector.body
3891
3892vector.body:                                      ; preds = %vector.body, %entry
3893  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3894  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3895  %wide.load = load <4 x i32>, ptr %0, align 4
3896  %1 = call <4 x i32> @llvm.vp.udiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3897  store <4 x i32> %1, ptr %0, align 4
3898  %index.next = add nuw i64 %index, 4
3899  %2 = icmp eq i64 %index.next, 1024
3900  br i1 %2, label %for.cond.cleanup, label %vector.body
3901
3902for.cond.cleanup:                                 ; preds = %vector.body
3903  ret void
3904}
3905
3906declare <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3907
3908define void @sink_splat_vp_sdiv(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3909; CHECK-LABEL: sink_splat_vp_sdiv:
3910; CHECK:       # %bb.0: # %entry
3911; CHECK-NEXT:    lui a3, 1
3912; CHECK-NEXT:    add a3, a0, a3
3913; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3914; CHECK-NEXT:  .LBB75_1: # %vector.body
3915; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3916; CHECK-NEXT:    vle32.v v8, (a0)
3917; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3918; CHECK-NEXT:    vdiv.vx v8, v8, a1, v0.t
3919; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3920; CHECK-NEXT:    vse32.v v8, (a0)
3921; CHECK-NEXT:    addi a0, a0, 16
3922; CHECK-NEXT:    bne a0, a3, .LBB75_1
3923; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3924; CHECK-NEXT:    ret
3925entry:
3926  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3927  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3928  br label %vector.body
3929
3930vector.body:                                      ; preds = %vector.body, %entry
3931  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3932  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3933  %wide.load = load <4 x i32>, ptr %0, align 4
3934  %1 = call <4 x i32> @llvm.vp.sdiv.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3935  store <4 x i32> %1, ptr %0, align 4
3936  %index.next = add nuw i64 %index, 4
3937  %2 = icmp eq i64 %index.next, 1024
3938  br i1 %2, label %for.cond.cleanup, label %vector.body
3939
3940for.cond.cleanup:                                 ; preds = %vector.body
3941  ret void
3942}
3943
3944declare <4 x i32> @llvm.vp.urem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3945
3946define void @sink_splat_vp_urem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3947; CHECK-LABEL: sink_splat_vp_urem:
3948; CHECK:       # %bb.0: # %entry
3949; CHECK-NEXT:    lui a3, 1
3950; CHECK-NEXT:    add a3, a0, a3
3951; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3952; CHECK-NEXT:  .LBB76_1: # %vector.body
3953; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3954; CHECK-NEXT:    vle32.v v8, (a0)
3955; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3956; CHECK-NEXT:    vremu.vx v8, v8, a1, v0.t
3957; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3958; CHECK-NEXT:    vse32.v v8, (a0)
3959; CHECK-NEXT:    addi a0, a0, 16
3960; CHECK-NEXT:    bne a0, a3, .LBB76_1
3961; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
3962; CHECK-NEXT:    ret
3963entry:
3964  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
3965  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
3966  br label %vector.body
3967
3968vector.body:                                      ; preds = %vector.body, %entry
3969  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
3970  %0 = getelementptr inbounds i32, ptr %a, i64 %index
3971  %wide.load = load <4 x i32>, ptr %0, align 4
3972  %1 = call <4 x i32> @llvm.vp.urem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
3973  store <4 x i32> %1, ptr %0, align 4
3974  %index.next = add nuw i64 %index, 4
3975  %2 = icmp eq i64 %index.next, 1024
3976  br i1 %2, label %for.cond.cleanup, label %vector.body
3977
3978for.cond.cleanup:                                 ; preds = %vector.body
3979  ret void
3980}
3981
3982declare <4 x i32> @llvm.vp.srem.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
3983
3984define void @sink_splat_vp_srem(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
3985; CHECK-LABEL: sink_splat_vp_srem:
3986; CHECK:       # %bb.0: # %entry
3987; CHECK-NEXT:    lui a3, 1
3988; CHECK-NEXT:    add a3, a0, a3
3989; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3990; CHECK-NEXT:  .LBB77_1: # %vector.body
3991; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
3992; CHECK-NEXT:    vle32.v v8, (a0)
3993; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
3994; CHECK-NEXT:    vrem.vx v8, v8, a1, v0.t
3995; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3996; CHECK-NEXT:    vse32.v v8, (a0)
3997; CHECK-NEXT:    addi a0, a0, 16
3998; CHECK-NEXT:    bne a0, a3, .LBB77_1
3999; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4000; CHECK-NEXT:    ret
4001entry:
4002  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
4003  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4004  br label %vector.body
4005
4006vector.body:                                      ; preds = %vector.body, %entry
4007  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4008  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4009  %wide.load = load <4 x i32>, ptr %0, align 4
4010  %1 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
4011  store <4 x i32> %1, ptr %0, align 4
4012  %index.next = add nuw i64 %index, 4
4013  %2 = icmp eq i64 %index.next, 1024
4014  br i1 %2, label %for.cond.cleanup, label %vector.body
4015
4016for.cond.cleanup:                                 ; preds = %vector.body
4017  ret void
4018}
4019
4020; Check that we don't sink a splat operand that has no chance of being folded.
4021
4022define void @sink_splat_vp_srem_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
4023; CHECK-LABEL: sink_splat_vp_srem_commute:
4024; CHECK:       # %bb.0: # %entry
4025; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4026; CHECK-NEXT:    vmv.v.x v8, a1
4027; CHECK-NEXT:    lui a1, 1
4028; CHECK-NEXT:    add a1, a0, a1
4029; CHECK-NEXT:  .LBB78_1: # %vector.body
4030; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4031; CHECK-NEXT:    vle32.v v9, (a0)
4032; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
4033; CHECK-NEXT:    vrem.vv v9, v8, v9, v0.t
4034; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4035; CHECK-NEXT:    vse32.v v9, (a0)
4036; CHECK-NEXT:    addi a0, a0, 16
4037; CHECK-NEXT:    bne a0, a1, .LBB78_1
4038; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4039; CHECK-NEXT:    ret
4040entry:
4041  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
4042  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4043  br label %vector.body
4044
4045vector.body:                                      ; preds = %vector.body, %entry
4046  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4047  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4048  %wide.load = load <4 x i32>, ptr %0, align 4
4049  %1 = call <4 x i32> @llvm.vp.srem.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
4050  store <4 x i32> %1, ptr %0, align 4
4051  %index.next = add nuw i64 %index, 4
4052  %2 = icmp eq i64 %index.next, 1024
4053  br i1 %2, label %for.cond.cleanup, label %vector.body
4054
4055for.cond.cleanup:                                 ; preds = %vector.body
4056  ret void
4057}
4058
4059declare <4 x float> @llvm.vp.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, <4 x i1>, i32)
4060
4061define void @sink_splat_vp_fma(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
4062; CHECK-LABEL: sink_splat_vp_fma:
4063; CHECK:       # %bb.0: # %entry
4064; CHECK-NEXT:    lui a3, 1
4065; CHECK-NEXT:    add a3, a1, a3
4066; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4067; CHECK-NEXT:  .LBB79_1: # %vector.body
4068; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4069; CHECK-NEXT:    vle32.v v8, (a0)
4070; CHECK-NEXT:    vle32.v v9, (a1)
4071; CHECK-NEXT:    addi a1, a1, 16
4072; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
4073; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
4074; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4075; CHECK-NEXT:    vse32.v v8, (a0)
4076; CHECK-NEXT:    addi a0, a0, 16
4077; CHECK-NEXT:    bne a1, a3, .LBB79_1
4078; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4079; CHECK-NEXT:    ret
4080entry:
4081  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
4082  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
4083  br label %vector.body
4084
4085vector.body:                                      ; preds = %vector.body, %entry
4086  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4087  %0 = getelementptr inbounds float, ptr %a, i64 %index
4088  %wide.load = load <4 x float>, ptr %0, align 4
4089  %1 = getelementptr inbounds float, ptr %b, i64 %index
4090  %wide.load12 = load <4 x float>, ptr %1, align 4
4091  %2 = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
4092  store <4 x float> %2, ptr %0, align 4
4093  %index.next = add nuw i64 %index, 4
4094  %3 = icmp eq i64 %index.next, 1024
4095  br i1 %3, label %for.cond.cleanup, label %vector.body
4096
4097for.cond.cleanup:                                 ; preds = %vector.body
4098  ret void
4099}
4100
4101define void @sink_splat_vp_fma_commute(ptr noalias nocapture %a, ptr nocapture readonly %b, float %x, <4 x i1> %m, i32 zeroext %vl) {
4102; CHECK-LABEL: sink_splat_vp_fma_commute:
4103; CHECK:       # %bb.0: # %entry
4104; CHECK-NEXT:    lui a3, 1
4105; CHECK-NEXT:    add a3, a1, a3
4106; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4107; CHECK-NEXT:  .LBB80_1: # %vector.body
4108; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4109; CHECK-NEXT:    vle32.v v8, (a0)
4110; CHECK-NEXT:    vle32.v v9, (a1)
4111; CHECK-NEXT:    addi a1, a1, 16
4112; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
4113; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
4114; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4115; CHECK-NEXT:    vse32.v v8, (a0)
4116; CHECK-NEXT:    addi a0, a0, 16
4117; CHECK-NEXT:    bne a1, a3, .LBB80_1
4118; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4119; CHECK-NEXT:    ret
4120entry:
4121  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
4122  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
4123  br label %vector.body
4124
4125vector.body:                                      ; preds = %vector.body, %entry
4126  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4127  %0 = getelementptr inbounds float, ptr %a, i64 %index
4128  %wide.load = load <4 x float>, ptr %0, align 4
4129  %1 = getelementptr inbounds float, ptr %b, i64 %index
4130  %wide.load12 = load <4 x float>, ptr %1, align 4
4131  %2 = call <4 x float> @llvm.vp.fma.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
4132  store <4 x float> %2, ptr %0, align 4
4133  %index.next = add nuw i64 %index, 4
4134  %3 = icmp eq i64 %index.next, 1024
4135  br i1 %3, label %for.cond.cleanup, label %vector.body
4136
4137for.cond.cleanup:                                 ; preds = %vector.body
4138  ret void
4139}
4140
4141
4142define void @sink_splat_mul_lmul2(ptr nocapture %a, i64 signext %x) {
4143; CHECK-LABEL: sink_splat_mul_lmul2:
4144; CHECK:       # %bb.0: # %entry
4145; CHECK-NEXT:    lui a2, 2
4146; CHECK-NEXT:    add a2, a0, a2
4147; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
4148; CHECK-NEXT:  .LBB81_1: # %vector.body
4149; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4150; CHECK-NEXT:    vle64.v v8, (a0)
4151; CHECK-NEXT:    vmul.vx v8, v8, a1
4152; CHECK-NEXT:    vse64.v v8, (a0)
4153; CHECK-NEXT:    addi a0, a0, 32
4154; CHECK-NEXT:    bne a0, a2, .LBB81_1
4155; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4156; CHECK-NEXT:    ret
4157entry:
4158  %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4159  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4160  br label %vector.body
4161
4162vector.body:                                      ; preds = %vector.body, %entry
4163  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4164  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4165  %wide.load = load <4 x i64>, ptr %0, align 8
4166  %1 = mul <4 x i64> %wide.load, %broadcast.splat
4167  store <4 x i64> %1, ptr %0, align 8
4168  %index.next = add nuw i64 %index, 4
4169  %2 = icmp eq i64 %index.next, 1024
4170  br i1 %2, label %for.cond.cleanup, label %vector.body
4171
4172for.cond.cleanup:                                 ; preds = %vector.body
4173  ret void
4174}
4175
4176define void @sink_splat_add_lmul2(ptr nocapture %a, i64 signext %x) {
4177; CHECK-LABEL: sink_splat_add_lmul2:
4178; CHECK:       # %bb.0: # %entry
4179; CHECK-NEXT:    lui a2, 2
4180; CHECK-NEXT:    add a2, a0, a2
4181; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
4182; CHECK-NEXT:  .LBB82_1: # %vector.body
4183; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4184; CHECK-NEXT:    vle64.v v8, (a0)
4185; CHECK-NEXT:    vadd.vx v8, v8, a1
4186; CHECK-NEXT:    vse64.v v8, (a0)
4187; CHECK-NEXT:    addi a0, a0, 32
4188; CHECK-NEXT:    bne a0, a2, .LBB82_1
4189; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4190; CHECK-NEXT:    ret
4191entry:
4192  %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4193  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4194  br label %vector.body
4195
4196vector.body:                                      ; preds = %vector.body, %entry
4197  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4198  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4199  %wide.load = load <4 x i64>, ptr %0, align 8
4200  %1 = add <4 x i64> %wide.load, %broadcast.splat
4201  store <4 x i64> %1, ptr %0, align 8
4202  %index.next = add nuw i64 %index, 4
4203  %2 = icmp eq i64 %index.next, 1024
4204  br i1 %2, label %for.cond.cleanup, label %vector.body
4205
4206for.cond.cleanup:                                 ; preds = %vector.body
4207  ret void
4208}
4209
4210define void @sink_splat_sub_lmul2(ptr nocapture %a, i64 signext %x) {
4211; CHECK-LABEL: sink_splat_sub_lmul2:
4212; CHECK:       # %bb.0: # %entry
4213; CHECK-NEXT:    lui a2, 2
4214; CHECK-NEXT:    add a2, a0, a2
4215; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
4216; CHECK-NEXT:  .LBB83_1: # %vector.body
4217; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4218; CHECK-NEXT:    vle64.v v8, (a0)
4219; CHECK-NEXT:    vsub.vx v8, v8, a1
4220; CHECK-NEXT:    vse64.v v8, (a0)
4221; CHECK-NEXT:    addi a0, a0, 32
4222; CHECK-NEXT:    bne a0, a2, .LBB83_1
4223; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4224; CHECK-NEXT:    ret
4225entry:
4226  %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4227  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4228  br label %vector.body
4229
4230vector.body:                                      ; preds = %vector.body, %entry
4231  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4232  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4233  %wide.load = load <4 x i64>, ptr %0, align 8
4234  %1 = sub <4 x i64> %wide.load, %broadcast.splat
4235  store <4 x i64> %1, ptr %0, align 8
4236  %index.next = add nuw i64 %index, 4
4237  %2 = icmp eq i64 %index.next, 1024
4238  br i1 %2, label %for.cond.cleanup, label %vector.body
4239
4240for.cond.cleanup:                                 ; preds = %vector.body
4241  ret void
4242}
4243
4244define void @sink_splat_rsub_lmul2(ptr nocapture %a, i64 signext %x) {
4245; CHECK-LABEL: sink_splat_rsub_lmul2:
4246; CHECK:       # %bb.0: # %entry
4247; CHECK-NEXT:    lui a2, 2
4248; CHECK-NEXT:    add a2, a0, a2
4249; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
4250; CHECK-NEXT:  .LBB84_1: # %vector.body
4251; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4252; CHECK-NEXT:    vle64.v v8, (a0)
4253; CHECK-NEXT:    vrsub.vx v8, v8, a1
4254; CHECK-NEXT:    vse64.v v8, (a0)
4255; CHECK-NEXT:    addi a0, a0, 32
4256; CHECK-NEXT:    bne a0, a2, .LBB84_1
4257; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4258; CHECK-NEXT:    ret
4259entry:
4260  %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4261  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4262  br label %vector.body
4263
4264vector.body:                                      ; preds = %vector.body, %entry
4265  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4266  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4267  %wide.load = load <4 x i64>, ptr %0, align 8
4268  %1 = sub <4 x i64> %broadcast.splat, %wide.load
4269  store <4 x i64> %1, ptr %0, align 8
4270  %index.next = add nuw i64 %index, 4
4271  %2 = icmp eq i64 %index.next, 1024
4272  br i1 %2, label %for.cond.cleanup, label %vector.body
4273
4274for.cond.cleanup:                                 ; preds = %vector.body
4275  ret void
4276}
4277
4278define void @sink_splat_and_lmul2(ptr nocapture %a, i64 signext %x) {
4279; CHECK-LABEL: sink_splat_and_lmul2:
4280; CHECK:       # %bb.0: # %entry
4281; CHECK-NEXT:    lui a2, 2
4282; CHECK-NEXT:    add a2, a0, a2
4283; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
4284; CHECK-NEXT:  .LBB85_1: # %vector.body
4285; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4286; CHECK-NEXT:    vle64.v v8, (a0)
4287; CHECK-NEXT:    vand.vx v8, v8, a1
4288; CHECK-NEXT:    vse64.v v8, (a0)
4289; CHECK-NEXT:    addi a0, a0, 32
4290; CHECK-NEXT:    bne a0, a2, .LBB85_1
4291; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4292; CHECK-NEXT:    ret
4293entry:
4294  %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4295  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4296  br label %vector.body
4297
4298vector.body:                                      ; preds = %vector.body, %entry
4299  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4300  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4301  %wide.load = load <4 x i64>, ptr %0, align 8
4302  %1 = and <4 x i64> %wide.load, %broadcast.splat
4303  store <4 x i64> %1, ptr %0, align 8
4304  %index.next = add nuw i64 %index, 4
4305  %2 = icmp eq i64 %index.next, 1024
4306  br i1 %2, label %for.cond.cleanup, label %vector.body
4307
4308for.cond.cleanup:                                 ; preds = %vector.body
4309  ret void
4310}
4311
4312define void @sink_splat_or_lmul2(ptr nocapture %a, i64 signext %x) {
4313; CHECK-LABEL: sink_splat_or_lmul2:
4314; CHECK:       # %bb.0: # %entry
4315; CHECK-NEXT:    lui a2, 2
4316; CHECK-NEXT:    add a2, a0, a2
4317; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
4318; CHECK-NEXT:  .LBB86_1: # %vector.body
4319; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4320; CHECK-NEXT:    vle64.v v8, (a0)
4321; CHECK-NEXT:    vor.vx v8, v8, a1
4322; CHECK-NEXT:    vse64.v v8, (a0)
4323; CHECK-NEXT:    addi a0, a0, 32
4324; CHECK-NEXT:    bne a0, a2, .LBB86_1
4325; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4326; CHECK-NEXT:    ret
4327entry:
4328  %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4329  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4330  br label %vector.body
4331
4332vector.body:                                      ; preds = %vector.body, %entry
4333  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4334  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4335  %wide.load = load <4 x i64>, ptr %0, align 8
4336  %1 = or <4 x i64> %wide.load, %broadcast.splat
4337  store <4 x i64> %1, ptr %0, align 8
4338  %index.next = add nuw i64 %index, 4
4339  %2 = icmp eq i64 %index.next, 1024
4340  br i1 %2, label %for.cond.cleanup, label %vector.body
4341
4342for.cond.cleanup:                                 ; preds = %vector.body
4343  ret void
4344}
4345
4346define void @sink_splat_xor_lmul2(ptr nocapture %a, i64 signext %x) {
4347; CHECK-LABEL: sink_splat_xor_lmul2:
4348; CHECK:       # %bb.0: # %entry
4349; CHECK-NEXT:    lui a2, 2
4350; CHECK-NEXT:    add a2, a0, a2
4351; CHECK-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
4352; CHECK-NEXT:  .LBB87_1: # %vector.body
4353; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4354; CHECK-NEXT:    vle64.v v8, (a0)
4355; CHECK-NEXT:    vxor.vx v8, v8, a1
4356; CHECK-NEXT:    vse64.v v8, (a0)
4357; CHECK-NEXT:    addi a0, a0, 32
4358; CHECK-NEXT:    bne a0, a2, .LBB87_1
4359; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4360; CHECK-NEXT:    ret
4361entry:
4362  %broadcast.splatinsert = insertelement <4 x i64> poison, i64 %x, i64 0
4363  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> poison, <4 x i32> zeroinitializer
4364  br label %vector.body
4365
4366vector.body:                                      ; preds = %vector.body, %entry
4367  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4368  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4369  %wide.load = load <4 x i64>, ptr %0, align 8
4370  %1 = xor <4 x i64> %wide.load, %broadcast.splat
4371  store <4 x i64> %1, ptr %0, align 8
4372  %index.next = add nuw i64 %index, 4
4373  %2 = icmp eq i64 %index.next, 1024
4374  br i1 %2, label %for.cond.cleanup, label %vector.body
4375
4376for.cond.cleanup:                                 ; preds = %vector.body
4377  ret void
4378}
4379
4380define void @sink_splat_mul_lmul8(ptr nocapture %a, i32 signext %x) {
4381; CHECK-LABEL: sink_splat_mul_lmul8:
4382; CHECK:       # %bb.0: # %entry
4383; CHECK-NEXT:    lui a2, 1
4384; CHECK-NEXT:    add a2, a0, a2
4385; CHECK-NEXT:    li a3, 32
4386; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
4387; CHECK-NEXT:  .LBB88_1: # %vector.body
4388; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4389; CHECK-NEXT:    vle32.v v8, (a0)
4390; CHECK-NEXT:    vmul.vx v8, v8, a1
4391; CHECK-NEXT:    vse32.v v8, (a0)
4392; CHECK-NEXT:    addi a0, a0, 16
4393; CHECK-NEXT:    bne a0, a2, .LBB88_1
4394; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4395; CHECK-NEXT:    ret
4396entry:
4397  %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4398  %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4399  br label %vector.body
4400
4401vector.body:                                      ; preds = %vector.body, %entry
4402  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4403  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4404  %wide.load = load <32 x i32>, ptr %0, align 4
4405  %1 = mul <32 x i32> %wide.load, %broadcast.splat
4406  store <32 x i32> %1, ptr %0, align 4
4407  %index.next = add nuw i64 %index, 4
4408  %2 = icmp eq i64 %index.next, 1024
4409  br i1 %2, label %for.cond.cleanup, label %vector.body
4410
4411for.cond.cleanup:                                 ; preds = %vector.body
4412  ret void
4413}
4414
4415define void @sink_splat_add_lmul8(ptr nocapture %a, i32 signext %x) {
4416; CHECK-LABEL: sink_splat_add_lmul8:
4417; CHECK:       # %bb.0: # %entry
4418; CHECK-NEXT:    lui a2, 1
4419; CHECK-NEXT:    add a2, a0, a2
4420; CHECK-NEXT:    li a3, 32
4421; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
4422; CHECK-NEXT:  .LBB89_1: # %vector.body
4423; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4424; CHECK-NEXT:    vle32.v v8, (a0)
4425; CHECK-NEXT:    vadd.vx v8, v8, a1
4426; CHECK-NEXT:    vse32.v v8, (a0)
4427; CHECK-NEXT:    addi a0, a0, 16
4428; CHECK-NEXT:    bne a0, a2, .LBB89_1
4429; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4430; CHECK-NEXT:    ret
4431entry:
4432  %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4433  %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4434  br label %vector.body
4435
4436vector.body:                                      ; preds = %vector.body, %entry
4437  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4438  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4439  %wide.load = load <32 x i32>, ptr %0, align 4
4440  %1 = add <32 x i32> %wide.load, %broadcast.splat
4441  store <32 x i32> %1, ptr %0, align 4
4442  %index.next = add nuw i64 %index, 4
4443  %2 = icmp eq i64 %index.next, 1024
4444  br i1 %2, label %for.cond.cleanup, label %vector.body
4445
4446for.cond.cleanup:                                 ; preds = %vector.body
4447  ret void
4448}
4449
4450define void @sink_splat_sub_lmul8(ptr nocapture %a, i32 signext %x) {
4451; CHECK-LABEL: sink_splat_sub_lmul8:
4452; CHECK:       # %bb.0: # %entry
4453; CHECK-NEXT:    lui a2, 1
4454; CHECK-NEXT:    add a2, a0, a2
4455; CHECK-NEXT:    li a3, 32
4456; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
4457; CHECK-NEXT:  .LBB90_1: # %vector.body
4458; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4459; CHECK-NEXT:    vle32.v v8, (a0)
4460; CHECK-NEXT:    vsub.vx v8, v8, a1
4461; CHECK-NEXT:    vse32.v v8, (a0)
4462; CHECK-NEXT:    addi a0, a0, 16
4463; CHECK-NEXT:    bne a0, a2, .LBB90_1
4464; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4465; CHECK-NEXT:    ret
4466entry:
4467  %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4468  %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4469  br label %vector.body
4470
4471vector.body:                                      ; preds = %vector.body, %entry
4472  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4473  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4474  %wide.load = load <32 x i32>, ptr %0, align 4
4475  %1 = sub <32 x i32> %wide.load, %broadcast.splat
4476  store <32 x i32> %1, ptr %0, align 4
4477  %index.next = add nuw i64 %index, 4
4478  %2 = icmp eq i64 %index.next, 1024
4479  br i1 %2, label %for.cond.cleanup, label %vector.body
4480
4481for.cond.cleanup:                                 ; preds = %vector.body
4482  ret void
4483}
4484
4485define void @sink_splat_rsub_lmul8(ptr nocapture %a, i32 signext %x) {
4486; CHECK-LABEL: sink_splat_rsub_lmul8:
4487; CHECK:       # %bb.0: # %entry
4488; CHECK-NEXT:    lui a2, 1
4489; CHECK-NEXT:    add a2, a0, a2
4490; CHECK-NEXT:    li a3, 32
4491; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
4492; CHECK-NEXT:  .LBB91_1: # %vector.body
4493; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4494; CHECK-NEXT:    vle32.v v8, (a0)
4495; CHECK-NEXT:    vrsub.vx v8, v8, a1
4496; CHECK-NEXT:    vse32.v v8, (a0)
4497; CHECK-NEXT:    addi a0, a0, 16
4498; CHECK-NEXT:    bne a0, a2, .LBB91_1
4499; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4500; CHECK-NEXT:    ret
4501entry:
4502  %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4503  %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4504  br label %vector.body
4505
4506vector.body:                                      ; preds = %vector.body, %entry
4507  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4508  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4509  %wide.load = load <32 x i32>, ptr %0, align 4
4510  %1 = sub <32 x i32> %broadcast.splat, %wide.load
4511  store <32 x i32> %1, ptr %0, align 4
4512  %index.next = add nuw i64 %index, 4
4513  %2 = icmp eq i64 %index.next, 1024
4514  br i1 %2, label %for.cond.cleanup, label %vector.body
4515
4516for.cond.cleanup:                                 ; preds = %vector.body
4517  ret void
4518}
4519
4520define void @sink_splat_and_lmul8(ptr nocapture %a, i32 signext %x) {
4521; CHECK-LABEL: sink_splat_and_lmul8:
4522; CHECK:       # %bb.0: # %entry
4523; CHECK-NEXT:    lui a2, 1
4524; CHECK-NEXT:    add a2, a0, a2
4525; CHECK-NEXT:    li a3, 32
4526; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
4527; CHECK-NEXT:  .LBB92_1: # %vector.body
4528; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4529; CHECK-NEXT:    vle32.v v8, (a0)
4530; CHECK-NEXT:    vand.vx v8, v8, a1
4531; CHECK-NEXT:    vse32.v v8, (a0)
4532; CHECK-NEXT:    addi a0, a0, 16
4533; CHECK-NEXT:    bne a0, a2, .LBB92_1
4534; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4535; CHECK-NEXT:    ret
4536entry:
4537  %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4538  %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4539  br label %vector.body
4540
4541vector.body:                                      ; preds = %vector.body, %entry
4542  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4543  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4544  %wide.load = load <32 x i32>, ptr %0, align 4
4545  %1 = and <32 x i32> %wide.load, %broadcast.splat
4546  store <32 x i32> %1, ptr %0, align 4
4547  %index.next = add nuw i64 %index, 4
4548  %2 = icmp eq i64 %index.next, 1024
4549  br i1 %2, label %for.cond.cleanup, label %vector.body
4550
4551for.cond.cleanup:                                 ; preds = %vector.body
4552  ret void
4553}
4554
4555define void @sink_splat_or_lmul8(ptr nocapture %a, i32 signext %x) {
4556; CHECK-LABEL: sink_splat_or_lmul8:
4557; CHECK:       # %bb.0: # %entry
4558; CHECK-NEXT:    lui a2, 1
4559; CHECK-NEXT:    add a2, a0, a2
4560; CHECK-NEXT:    li a3, 32
4561; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
4562; CHECK-NEXT:  .LBB93_1: # %vector.body
4563; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4564; CHECK-NEXT:    vle32.v v8, (a0)
4565; CHECK-NEXT:    vor.vx v8, v8, a1
4566; CHECK-NEXT:    vse32.v v8, (a0)
4567; CHECK-NEXT:    addi a0, a0, 16
4568; CHECK-NEXT:    bne a0, a2, .LBB93_1
4569; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4570; CHECK-NEXT:    ret
4571entry:
4572  %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4573  %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4574  br label %vector.body
4575
4576vector.body:                                      ; preds = %vector.body, %entry
4577  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4578  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4579  %wide.load = load <32 x i32>, ptr %0, align 4
4580  %1 = or <32 x i32> %wide.load, %broadcast.splat
4581  store <32 x i32> %1, ptr %0, align 4
4582  %index.next = add nuw i64 %index, 4
4583  %2 = icmp eq i64 %index.next, 1024
4584  br i1 %2, label %for.cond.cleanup, label %vector.body
4585
4586for.cond.cleanup:                                 ; preds = %vector.body
4587  ret void
4588}
4589
4590define void @sink_splat_xor_lmul8(ptr nocapture %a, i32 signext %x) {
4591; CHECK-LABEL: sink_splat_xor_lmul8:
4592; CHECK:       # %bb.0: # %entry
4593; CHECK-NEXT:    lui a2, 1
4594; CHECK-NEXT:    add a2, a0, a2
4595; CHECK-NEXT:    li a3, 32
4596; CHECK-NEXT:    vsetvli zero, a3, e32, m8, ta, ma
4597; CHECK-NEXT:  .LBB94_1: # %vector.body
4598; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4599; CHECK-NEXT:    vle32.v v8, (a0)
4600; CHECK-NEXT:    vxor.vx v8, v8, a1
4601; CHECK-NEXT:    vse32.v v8, (a0)
4602; CHECK-NEXT:    addi a0, a0, 16
4603; CHECK-NEXT:    bne a0, a2, .LBB94_1
4604; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4605; CHECK-NEXT:    ret
4606entry:
4607  %broadcast.splatinsert = insertelement <32 x i32> poison, i32 %x, i32 0
4608  %broadcast.splat = shufflevector <32 x i32> %broadcast.splatinsert, <32 x i32> poison, <32 x i32> zeroinitializer
4609  br label %vector.body
4610
4611vector.body:                                      ; preds = %vector.body, %entry
4612  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4613  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4614  %wide.load = load <32 x i32>, ptr %0, align 4
4615  %1 = xor <32 x i32> %wide.load, %broadcast.splat
4616  store <32 x i32> %1, ptr %0, align 4
4617  %index.next = add nuw i64 %index, 4
4618  %2 = icmp eq i64 %index.next, 1024
4619  br i1 %2, label %for.cond.cleanup, label %vector.body
4620
4621for.cond.cleanup:                                 ; preds = %vector.body
4622  ret void
4623}
4624
4625define void @sink_splat_mul_lmulmf2(ptr nocapture %a, i32 signext %x) {
4626; CHECK-LABEL: sink_splat_mul_lmulmf2:
4627; CHECK:       # %bb.0: # %entry
4628; CHECK-NEXT:    lui a2, 2
4629; CHECK-NEXT:    add a2, a0, a2
4630; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
4631; CHECK-NEXT:  .LBB95_1: # %vector.body
4632; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4633; CHECK-NEXT:    vle32.v v8, (a0)
4634; CHECK-NEXT:    vmul.vx v8, v8, a1
4635; CHECK-NEXT:    vse32.v v8, (a0)
4636; CHECK-NEXT:    addi a0, a0, 32
4637; CHECK-NEXT:    bne a0, a2, .LBB95_1
4638; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4639; CHECK-NEXT:    ret
4640entry:
4641  %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4642  %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4643  br label %vector.body
4644
4645vector.body:                                      ; preds = %vector.body, %entry
4646  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4647  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4648  %wide.load = load <2 x i32>, ptr %0, align 8
4649  %1 = mul <2 x i32> %wide.load, %broadcast.splat
4650  store <2 x i32> %1, ptr %0, align 8
4651  %index.next = add nuw i64 %index, 4
4652  %2 = icmp eq i64 %index.next, 1024
4653  br i1 %2, label %for.cond.cleanup, label %vector.body
4654
4655for.cond.cleanup:                                 ; preds = %vector.body
4656  ret void
4657}
4658
4659define void @sink_splat_add_lmulmf2(ptr nocapture %a, i32 signext %x) {
4660; CHECK-LABEL: sink_splat_add_lmulmf2:
4661; CHECK:       # %bb.0: # %entry
4662; CHECK-NEXT:    lui a2, 2
4663; CHECK-NEXT:    add a2, a0, a2
4664; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
4665; CHECK-NEXT:  .LBB96_1: # %vector.body
4666; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4667; CHECK-NEXT:    vle32.v v8, (a0)
4668; CHECK-NEXT:    vadd.vx v8, v8, a1
4669; CHECK-NEXT:    vse32.v v8, (a0)
4670; CHECK-NEXT:    addi a0, a0, 32
4671; CHECK-NEXT:    bne a0, a2, .LBB96_1
4672; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4673; CHECK-NEXT:    ret
4674entry:
4675  %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4676  %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4677  br label %vector.body
4678
4679vector.body:                                      ; preds = %vector.body, %entry
4680  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4681  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4682  %wide.load = load <2 x i32>, ptr %0, align 8
4683  %1 = add <2 x i32> %wide.load, %broadcast.splat
4684  store <2 x i32> %1, ptr %0, align 8
4685  %index.next = add nuw i64 %index, 4
4686  %2 = icmp eq i64 %index.next, 1024
4687  br i1 %2, label %for.cond.cleanup, label %vector.body
4688
4689for.cond.cleanup:                                 ; preds = %vector.body
4690  ret void
4691}
4692
4693define void @sink_splat_sub_lmulmf2(ptr nocapture %a, i32 signext %x) {
4694; CHECK-LABEL: sink_splat_sub_lmulmf2:
4695; CHECK:       # %bb.0: # %entry
4696; CHECK-NEXT:    lui a2, 2
4697; CHECK-NEXT:    add a2, a0, a2
4698; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
4699; CHECK-NEXT:  .LBB97_1: # %vector.body
4700; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4701; CHECK-NEXT:    vle32.v v8, (a0)
4702; CHECK-NEXT:    vsub.vx v8, v8, a1
4703; CHECK-NEXT:    vse32.v v8, (a0)
4704; CHECK-NEXT:    addi a0, a0, 32
4705; CHECK-NEXT:    bne a0, a2, .LBB97_1
4706; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4707; CHECK-NEXT:    ret
4708entry:
4709  %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4710  %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4711  br label %vector.body
4712
4713vector.body:                                      ; preds = %vector.body, %entry
4714  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4715  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4716  %wide.load = load <2 x i32>, ptr %0, align 8
4717  %1 = sub <2 x i32> %wide.load, %broadcast.splat
4718  store <2 x i32> %1, ptr %0, align 8
4719  %index.next = add nuw i64 %index, 4
4720  %2 = icmp eq i64 %index.next, 1024
4721  br i1 %2, label %for.cond.cleanup, label %vector.body
4722
4723for.cond.cleanup:                                 ; preds = %vector.body
4724  ret void
4725}
4726
4727define void @sink_splat_rsub_lmulmf2(ptr nocapture %a, i32 signext %x) {
4728; CHECK-LABEL: sink_splat_rsub_lmulmf2:
4729; CHECK:       # %bb.0: # %entry
4730; CHECK-NEXT:    lui a2, 2
4731; CHECK-NEXT:    add a2, a0, a2
4732; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
4733; CHECK-NEXT:  .LBB98_1: # %vector.body
4734; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4735; CHECK-NEXT:    vle32.v v8, (a0)
4736; CHECK-NEXT:    vrsub.vx v8, v8, a1
4737; CHECK-NEXT:    vse32.v v8, (a0)
4738; CHECK-NEXT:    addi a0, a0, 32
4739; CHECK-NEXT:    bne a0, a2, .LBB98_1
4740; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4741; CHECK-NEXT:    ret
4742entry:
4743  %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4744  %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4745  br label %vector.body
4746
4747vector.body:                                      ; preds = %vector.body, %entry
4748  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4749  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4750  %wide.load = load <2 x i32>, ptr %0, align 8
4751  %1 = sub <2 x i32> %broadcast.splat, %wide.load
4752  store <2 x i32> %1, ptr %0, align 8
4753  %index.next = add nuw i64 %index, 4
4754  %2 = icmp eq i64 %index.next, 1024
4755  br i1 %2, label %for.cond.cleanup, label %vector.body
4756
4757for.cond.cleanup:                                 ; preds = %vector.body
4758  ret void
4759}
4760
4761define void @sink_splat_and_lmulmf2(ptr nocapture %a, i32 signext %x) {
4762; CHECK-LABEL: sink_splat_and_lmulmf2:
4763; CHECK:       # %bb.0: # %entry
4764; CHECK-NEXT:    lui a2, 2
4765; CHECK-NEXT:    add a2, a0, a2
4766; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
4767; CHECK-NEXT:  .LBB99_1: # %vector.body
4768; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4769; CHECK-NEXT:    vle32.v v8, (a0)
4770; CHECK-NEXT:    vand.vx v8, v8, a1
4771; CHECK-NEXT:    vse32.v v8, (a0)
4772; CHECK-NEXT:    addi a0, a0, 32
4773; CHECK-NEXT:    bne a0, a2, .LBB99_1
4774; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4775; CHECK-NEXT:    ret
4776entry:
4777  %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4778  %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4779  br label %vector.body
4780
4781vector.body:                                      ; preds = %vector.body, %entry
4782  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4783  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4784  %wide.load = load <2 x i32>, ptr %0, align 8
4785  %1 = and <2 x i32> %wide.load, %broadcast.splat
4786  store <2 x i32> %1, ptr %0, align 8
4787  %index.next = add nuw i64 %index, 4
4788  %2 = icmp eq i64 %index.next, 1024
4789  br i1 %2, label %for.cond.cleanup, label %vector.body
4790
4791for.cond.cleanup:                                 ; preds = %vector.body
4792  ret void
4793}
4794
4795define void @sink_splat_or_lmulmf2(ptr nocapture %a, i32 signext %x) {
4796; CHECK-LABEL: sink_splat_or_lmulmf2:
4797; CHECK:       # %bb.0: # %entry
4798; CHECK-NEXT:    lui a2, 2
4799; CHECK-NEXT:    add a2, a0, a2
4800; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
4801; CHECK-NEXT:  .LBB100_1: # %vector.body
4802; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4803; CHECK-NEXT:    vle32.v v8, (a0)
4804; CHECK-NEXT:    vor.vx v8, v8, a1
4805; CHECK-NEXT:    vse32.v v8, (a0)
4806; CHECK-NEXT:    addi a0, a0, 32
4807; CHECK-NEXT:    bne a0, a2, .LBB100_1
4808; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4809; CHECK-NEXT:    ret
4810entry:
4811  %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4812  %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4813  br label %vector.body
4814
4815vector.body:                                      ; preds = %vector.body, %entry
4816  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4817  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4818  %wide.load = load <2 x i32>, ptr %0, align 8
4819  %1 = or <2 x i32> %wide.load, %broadcast.splat
4820  store <2 x i32> %1, ptr %0, align 8
4821  %index.next = add nuw i64 %index, 4
4822  %2 = icmp eq i64 %index.next, 1024
4823  br i1 %2, label %for.cond.cleanup, label %vector.body
4824
4825for.cond.cleanup:                                 ; preds = %vector.body
4826  ret void
4827}
4828
4829define void @sink_splat_xor_lmulmf2(ptr nocapture %a, i32 signext %x) {
4830; CHECK-LABEL: sink_splat_xor_lmulmf2:
4831; CHECK:       # %bb.0: # %entry
4832; CHECK-NEXT:    lui a2, 2
4833; CHECK-NEXT:    add a2, a0, a2
4834; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
4835; CHECK-NEXT:  .LBB101_1: # %vector.body
4836; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4837; CHECK-NEXT:    vle32.v v8, (a0)
4838; CHECK-NEXT:    vxor.vx v8, v8, a1
4839; CHECK-NEXT:    vse32.v v8, (a0)
4840; CHECK-NEXT:    addi a0, a0, 32
4841; CHECK-NEXT:    bne a0, a2, .LBB101_1
4842; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4843; CHECK-NEXT:    ret
4844entry:
4845  %broadcast.splatinsert = insertelement <2 x i32> poison, i32 %x, i64 0
4846  %broadcast.splat = shufflevector <2 x i32> %broadcast.splatinsert, <2 x i32> poison, <2 x i32> zeroinitializer
4847  br label %vector.body
4848
4849vector.body:                                      ; preds = %vector.body, %entry
4850  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4851  %0 = getelementptr inbounds i64, ptr %a, i64 %index
4852  %wide.load = load <2 x i32>, ptr %0, align 8
4853  %1 = xor <2 x i32> %wide.load, %broadcast.splat
4854  store <2 x i32> %1, ptr %0, align 8
4855  %index.next = add nuw i64 %index, 4
4856  %2 = icmp eq i64 %index.next, 1024
4857  br i1 %2, label %for.cond.cleanup, label %vector.body
4858
4859for.cond.cleanup:                                 ; preds = %vector.body
4860  ret void
4861}
4862
4863declare <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32>, <4 x i32>, metadata, <4 x i1>, i32)
4864
4865define void @sink_splat_vp_icmp(ptr nocapture %x, i32 signext %y, <4 x i1> %m, i32 zeroext %vl) {
4866; CHECK-LABEL: sink_splat_vp_icmp:
4867; CHECK:       # %bb.0: # %entry
4868; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4869; CHECK-NEXT:    vmv1r.v v8, v0
4870; CHECK-NEXT:    lui a3, 1
4871; CHECK-NEXT:    add a3, a0, a3
4872; CHECK-NEXT:    vmv.v.i v9, 0
4873; CHECK-NEXT:  .LBB102_1: # %vector.body
4874; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4875; CHECK-NEXT:    vle32.v v10, (a0)
4876; CHECK-NEXT:    vmv1r.v v0, v8
4877; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
4878; CHECK-NEXT:    vmseq.vx v0, v10, a1, v0.t
4879; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4880; CHECK-NEXT:    vse32.v v9, (a0), v0.t
4881; CHECK-NEXT:    addi a0, a0, 16
4882; CHECK-NEXT:    bne a0, a3, .LBB102_1
4883; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4884; CHECK-NEXT:    ret
4885entry:
4886  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %y, i32 0
4887  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4888  br label %vector.body
4889
4890vector.body:                                      ; preds = %vector.body, %entry
4891  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4892  %0 = getelementptr inbounds i32, ptr %x, i64 %index
4893  %wide.load = load <4 x i32>, ptr %0, align 4
4894  %1 = call <4 x i1> @llvm.vp.icmp.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, metadata !"eq", <4 x i1> %m, i32 %vl)
4895  call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
4896  %index.next = add nuw i64 %index, 4
4897  %2 = icmp eq i64 %index.next, 1024
4898  br i1 %2, label %for.cond.cleanup, label %vector.body
4899
4900for.cond.cleanup:                                 ; preds = %vector.body
4901  ret void
4902}
4903
4904declare <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float>, <4 x float>, metadata, <4 x i1>, i32)
4905
4906define void @sink_splat_vp_fcmp(ptr nocapture %x, float %y, <4 x i1> %m, i32 zeroext %vl) {
4907; CHECK-LABEL: sink_splat_vp_fcmp:
4908; CHECK:       # %bb.0: # %entry
4909; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4910; CHECK-NEXT:    vmv1r.v v8, v0
4911; CHECK-NEXT:    lui a2, 1
4912; CHECK-NEXT:    add a2, a0, a2
4913; CHECK-NEXT:    vmv.v.i v9, 0
4914; CHECK-NEXT:  .LBB103_1: # %vector.body
4915; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4916; CHECK-NEXT:    vle32.v v10, (a0)
4917; CHECK-NEXT:    vmv1r.v v0, v8
4918; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
4919; CHECK-NEXT:    vmfeq.vf v0, v10, fa0, v0.t
4920; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4921; CHECK-NEXT:    vse32.v v9, (a0), v0.t
4922; CHECK-NEXT:    addi a0, a0, 16
4923; CHECK-NEXT:    bne a0, a2, .LBB103_1
4924; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4925; CHECK-NEXT:    ret
4926entry:
4927  %broadcast.splatinsert = insertelement <4 x float> poison, float %y, i32 0
4928  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
4929  br label %vector.body
4930
4931vector.body:                                      ; preds = %vector.body, %entry
4932  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4933  %0 = getelementptr inbounds float, ptr %x, i64 %index
4934  %wide.load = load <4 x float>, ptr %0, align 4
4935  %1 = call <4 x i1> @llvm.vp.fcmp.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, metadata !"oeq", <4 x i1> %m, i32 %vl)
4936  call void @llvm.masked.store.v4f32.p0(<4 x float> zeroinitializer, ptr %0, i32 4, <4 x i1> %1)
4937  %index.next = add nuw i64 %index, 4
4938  %2 = icmp eq i64 %index.next, 1024
4939  br i1 %2, label %for.cond.cleanup, label %vector.body
4940
4941for.cond.cleanup:                                 ; preds = %vector.body
4942  ret void
4943}
4944
4945declare <4 x i32> @llvm.vp.smin.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
4946
4947define void @sink_splat_vp_min(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
4948; CHECK-LABEL: sink_splat_vp_min:
4949; CHECK:       # %bb.0: # %entry
4950; CHECK-NEXT:    lui a3, 1
4951; CHECK-NEXT:    add a3, a0, a3
4952; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4953; CHECK-NEXT:  .LBB104_1: # %vector.body
4954; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4955; CHECK-NEXT:    vle32.v v8, (a0)
4956; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
4957; CHECK-NEXT:    vmin.vx v8, v8, a1, v0.t
4958; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4959; CHECK-NEXT:    vse32.v v8, (a0)
4960; CHECK-NEXT:    addi a0, a0, 16
4961; CHECK-NEXT:    bne a0, a3, .LBB104_1
4962; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4963; CHECK-NEXT:    ret
4964entry:
4965  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
4966  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
4967  br label %vector.body
4968
4969vector.body:                                      ; preds = %vector.body, %entry
4970  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
4971  %0 = getelementptr inbounds i32, ptr %a, i64 %index
4972  %wide.load = load <4 x i32>, ptr %0, align 4
4973  %1 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
4974  store <4 x i32> %1, ptr %0, align 4
4975  %index.next = add nuw i64 %index, 4
4976  %2 = icmp eq i64 %index.next, 1024
4977  br i1 %2, label %for.cond.cleanup, label %vector.body
4978
4979for.cond.cleanup:                                 ; preds = %vector.body
4980  ret void
4981}
4982
4983define void @sink_splat_vp_min_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
4984; CHECK-LABEL: sink_splat_vp_min_commute:
4985; CHECK:       # %bb.0: # %entry
4986; CHECK-NEXT:    lui a3, 1
4987; CHECK-NEXT:    add a3, a0, a3
4988; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4989; CHECK-NEXT:  .LBB105_1: # %vector.body
4990; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
4991; CHECK-NEXT:    vle32.v v8, (a0)
4992; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
4993; CHECK-NEXT:    vmin.vx v8, v8, a1, v0.t
4994; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4995; CHECK-NEXT:    vse32.v v8, (a0)
4996; CHECK-NEXT:    addi a0, a0, 16
4997; CHECK-NEXT:    bne a0, a3, .LBB105_1
4998; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
4999; CHECK-NEXT:    ret
5000entry:
5001  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5002  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5003  br label %vector.body
5004
5005vector.body:                                      ; preds = %vector.body, %entry
5006  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5007  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5008  %wide.load = load <4 x i32>, ptr %0, align 4
5009  %1 = call <4 x i32> @llvm.vp.smin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5010  store <4 x i32> %1, ptr %0, align 4
5011  %index.next = add nuw i64 %index, 4
5012  %2 = icmp eq i64 %index.next, 1024
5013  br i1 %2, label %for.cond.cleanup, label %vector.body
5014
5015for.cond.cleanup:                                 ; preds = %vector.body
5016  ret void
5017}
5018
5019declare <4 x i32> @llvm.vp.smax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5020
5021define void @sink_splat_vp_max(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5022; CHECK-LABEL: sink_splat_vp_max:
5023; CHECK:       # %bb.0: # %entry
5024; CHECK-NEXT:    lui a3, 1
5025; CHECK-NEXT:    add a3, a0, a3
5026; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5027; CHECK-NEXT:  .LBB106_1: # %vector.body
5028; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5029; CHECK-NEXT:    vle32.v v8, (a0)
5030; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5031; CHECK-NEXT:    vmax.vx v8, v8, a1, v0.t
5032; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5033; CHECK-NEXT:    vse32.v v8, (a0)
5034; CHECK-NEXT:    addi a0, a0, 16
5035; CHECK-NEXT:    bne a0, a3, .LBB106_1
5036; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5037; CHECK-NEXT:    ret
5038entry:
5039  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5040  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5041  br label %vector.body
5042
5043vector.body:                                      ; preds = %vector.body, %entry
5044  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5045  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5046  %wide.load = load <4 x i32>, ptr %0, align 4
5047  %1 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5048  store <4 x i32> %1, ptr %0, align 4
5049  %index.next = add nuw i64 %index, 4
5050  %2 = icmp eq i64 %index.next, 1024
5051  br i1 %2, label %for.cond.cleanup, label %vector.body
5052
5053for.cond.cleanup:                                 ; preds = %vector.body
5054  ret void
5055}
5056
5057define void @sink_splat_vp_max_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5058; CHECK-LABEL: sink_splat_vp_max_commute:
5059; CHECK:       # %bb.0: # %entry
5060; CHECK-NEXT:    lui a3, 1
5061; CHECK-NEXT:    add a3, a0, a3
5062; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5063; CHECK-NEXT:  .LBB107_1: # %vector.body
5064; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5065; CHECK-NEXT:    vle32.v v8, (a0)
5066; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5067; CHECK-NEXT:    vmax.vx v8, v8, a1, v0.t
5068; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5069; CHECK-NEXT:    vse32.v v8, (a0)
5070; CHECK-NEXT:    addi a0, a0, 16
5071; CHECK-NEXT:    bne a0, a3, .LBB107_1
5072; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5073; CHECK-NEXT:    ret
5074entry:
5075  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5076  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5077  br label %vector.body
5078
5079vector.body:                                      ; preds = %vector.body, %entry
5080  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5081  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5082  %wide.load = load <4 x i32>, ptr %0, align 4
5083  %1 = call <4 x i32> @llvm.vp.smax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5084  store <4 x i32> %1, ptr %0, align 4
5085  %index.next = add nuw i64 %index, 4
5086  %2 = icmp eq i64 %index.next, 1024
5087  br i1 %2, label %for.cond.cleanup, label %vector.body
5088
5089for.cond.cleanup:                                 ; preds = %vector.body
5090  ret void
5091}
5092
5093define void @sink_splat_vp_umin_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5094; CHECK-LABEL: sink_splat_vp_umin_commute:
5095; CHECK:       # %bb.0: # %entry
5096; CHECK-NEXT:    lui a3, 1
5097; CHECK-NEXT:    add a3, a0, a3
5098; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5099; CHECK-NEXT:  .LBB108_1: # %vector.body
5100; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5101; CHECK-NEXT:    vle32.v v8, (a0)
5102; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5103; CHECK-NEXT:    vminu.vx v8, v8, a1, v0.t
5104; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5105; CHECK-NEXT:    vse32.v v8, (a0)
5106; CHECK-NEXT:    addi a0, a0, 16
5107; CHECK-NEXT:    bne a0, a3, .LBB108_1
5108; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5109; CHECK-NEXT:    ret
5110entry:
5111  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5112  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5113  br label %vector.body
5114
5115vector.body:                                      ; preds = %vector.body, %entry
5116  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5117  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5118  %wide.load = load <4 x i32>, ptr %0, align 4
5119  %1 = call <4 x i32> @llvm.vp.umin.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5120  store <4 x i32> %1, ptr %0, align 4
5121  %index.next = add nuw i64 %index, 4
5122  %2 = icmp eq i64 %index.next, 1024
5123  br i1 %2, label %for.cond.cleanup, label %vector.body
5124
5125for.cond.cleanup:                                 ; preds = %vector.body
5126  ret void
5127}
5128
5129declare <4 x i32> @llvm.vp.umax.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5130
5131define void @sink_splat_vp_umax(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5132; CHECK-LABEL: sink_splat_vp_umax:
5133; CHECK:       # %bb.0: # %entry
5134; CHECK-NEXT:    lui a3, 1
5135; CHECK-NEXT:    add a3, a0, a3
5136; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5137; CHECK-NEXT:  .LBB109_1: # %vector.body
5138; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5139; CHECK-NEXT:    vle32.v v8, (a0)
5140; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5141; CHECK-NEXT:    vmaxu.vx v8, v8, a1, v0.t
5142; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5143; CHECK-NEXT:    vse32.v v8, (a0)
5144; CHECK-NEXT:    addi a0, a0, 16
5145; CHECK-NEXT:    bne a0, a3, .LBB109_1
5146; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5147; CHECK-NEXT:    ret
5148entry:
5149  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5150  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5151  br label %vector.body
5152
5153vector.body:                                      ; preds = %vector.body, %entry
5154  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5155  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5156  %wide.load = load <4 x i32>, ptr %0, align 4
5157  %1 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5158  store <4 x i32> %1, ptr %0, align 4
5159  %index.next = add nuw i64 %index, 4
5160  %2 = icmp eq i64 %index.next, 1024
5161  br i1 %2, label %for.cond.cleanup, label %vector.body
5162
5163for.cond.cleanup:                                 ; preds = %vector.body
5164  ret void
5165}
5166
5167define void @sink_splat_vp_umax_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5168; CHECK-LABEL: sink_splat_vp_umax_commute:
5169; CHECK:       # %bb.0: # %entry
5170; CHECK-NEXT:    lui a3, 1
5171; CHECK-NEXT:    add a3, a0, a3
5172; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5173; CHECK-NEXT:  .LBB110_1: # %vector.body
5174; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5175; CHECK-NEXT:    vle32.v v8, (a0)
5176; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5177; CHECK-NEXT:    vmaxu.vx v8, v8, a1, v0.t
5178; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5179; CHECK-NEXT:    vse32.v v8, (a0)
5180; CHECK-NEXT:    addi a0, a0, 16
5181; CHECK-NEXT:    bne a0, a3, .LBB110_1
5182; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5183; CHECK-NEXT:    ret
5184entry:
5185  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5186  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5187  br label %vector.body
5188
5189vector.body:                                      ; preds = %vector.body, %entry
5190  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5191  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5192  %wide.load = load <4 x i32>, ptr %0, align 4
5193  %1 = call <4 x i32> @llvm.vp.umax.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5194  store <4 x i32> %1, ptr %0, align 4
5195  %index.next = add nuw i64 %index, 4
5196  %2 = icmp eq i64 %index.next, 1024
5197  br i1 %2, label %for.cond.cleanup, label %vector.body
5198
5199for.cond.cleanup:                                 ; preds = %vector.body
5200  ret void
5201}
5202
5203declare <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5204
5205define void @sink_splat_vp_sadd_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5206; CHECK-LABEL: sink_splat_vp_sadd_sat:
5207; CHECK:       # %bb.0: # %entry
5208; CHECK-NEXT:    lui a3, 1
5209; CHECK-NEXT:    add a3, a0, a3
5210; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5211; CHECK-NEXT:  .LBB111_1: # %vector.body
5212; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5213; CHECK-NEXT:    vle32.v v8, (a0)
5214; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5215; CHECK-NEXT:    vsadd.vx v8, v8, a1, v0.t
5216; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5217; CHECK-NEXT:    vse32.v v8, (a0)
5218; CHECK-NEXT:    addi a0, a0, 16
5219; CHECK-NEXT:    bne a0, a3, .LBB111_1
5220; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5221; CHECK-NEXT:    ret
5222entry:
5223  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5224  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5225  br label %vector.body
5226
5227vector.body:                                      ; preds = %vector.body, %entry
5228  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5229  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5230  %wide.load = load <4 x i32>, ptr %0, align 4
5231  %1 = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5232  store <4 x i32> %1, ptr %0, align 4
5233  %index.next = add nuw i64 %index, 4
5234  %2 = icmp eq i64 %index.next, 1024
5235  br i1 %2, label %for.cond.cleanup, label %vector.body
5236
5237for.cond.cleanup:                                 ; preds = %vector.body
5238  ret void
5239}
5240
5241define void @sink_splat_vp_sadd_sat_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5242; CHECK-LABEL: sink_splat_vp_sadd_sat_commute:
5243; CHECK:       # %bb.0: # %entry
5244; CHECK-NEXT:    lui a3, 1
5245; CHECK-NEXT:    add a3, a0, a3
5246; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5247; CHECK-NEXT:  .LBB112_1: # %vector.body
5248; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5249; CHECK-NEXT:    vle32.v v8, (a0)
5250; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5251; CHECK-NEXT:    vsadd.vx v8, v8, a1, v0.t
5252; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5253; CHECK-NEXT:    vse32.v v8, (a0)
5254; CHECK-NEXT:    addi a0, a0, 16
5255; CHECK-NEXT:    bne a0, a3, .LBB112_1
5256; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5257; CHECK-NEXT:    ret
5258entry:
5259  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5260  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5261  br label %vector.body
5262
5263vector.body:                                      ; preds = %vector.body, %entry
5264  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5265  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5266  %wide.load = load <4 x i32>, ptr %0, align 4
5267  %1 = call <4 x i32> @llvm.vp.sadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5268  store <4 x i32> %1, ptr %0, align 4
5269  %index.next = add nuw i64 %index, 4
5270  %2 = icmp eq i64 %index.next, 1024
5271  br i1 %2, label %for.cond.cleanup, label %vector.body
5272
5273for.cond.cleanup:                                 ; preds = %vector.body
5274  ret void
5275}
5276
5277declare <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5278
5279define void @sink_splat_vp_ssub_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5280; CHECK-LABEL: sink_splat_vp_ssub_sat:
5281; CHECK:       # %bb.0: # %entry
5282; CHECK-NEXT:    li a3, 1024
5283; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5284; CHECK-NEXT:  .LBB113_1: # %vector.body
5285; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5286; CHECK-NEXT:    vle32.v v8, (a0)
5287; CHECK-NEXT:    addi a3, a3, 4
5288; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5289; CHECK-NEXT:    vssub.vx v8, v8, a1, v0.t
5290; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5291; CHECK-NEXT:    vse32.v v8, (a0)
5292; CHECK-NEXT:    addi a0, a0, -16
5293; CHECK-NEXT:    bnez a3, .LBB113_1
5294; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5295; CHECK-NEXT:    ret
5296entry:
5297  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5298  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5299  br label %vector.body
5300
5301vector.body:                                      ; preds = %vector.body, %entry
5302  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5303  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5304  %wide.load = load <4 x i32>, ptr %0, align 4
5305  %1 = call <4 x i32> @llvm.vp.ssub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5306  store <4 x i32> %1, ptr %0, align 4
5307  %index.next = sub nuw i64 %index, 4
5308  %2 = icmp eq i64 %index.next, 1024
5309  br i1 %2, label %for.cond.cleanup, label %vector.body
5310
5311for.cond.cleanup:                                 ; preds = %vector.body
5312  ret void
5313}
5314
5315declare <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5316
5317define void @sink_splat_vp_uadd_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5318; CHECK-LABEL: sink_splat_vp_uadd_sat:
5319; CHECK:       # %bb.0: # %entry
5320; CHECK-NEXT:    lui a3, 1
5321; CHECK-NEXT:    add a3, a0, a3
5322; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5323; CHECK-NEXT:  .LBB114_1: # %vector.body
5324; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5325; CHECK-NEXT:    vle32.v v8, (a0)
5326; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5327; CHECK-NEXT:    vsaddu.vx v8, v8, a1, v0.t
5328; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5329; CHECK-NEXT:    vse32.v v8, (a0)
5330; CHECK-NEXT:    addi a0, a0, 16
5331; CHECK-NEXT:    bne a0, a3, .LBB114_1
5332; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5333; CHECK-NEXT:    ret
5334entry:
5335  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5336  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5337  br label %vector.body
5338
5339vector.body:                                      ; preds = %vector.body, %entry
5340  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5341  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5342  %wide.load = load <4 x i32>, ptr %0, align 4
5343  %1 = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5344  store <4 x i32> %1, ptr %0, align 4
5345  %index.next = add nuw i64 %index, 4
5346  %2 = icmp eq i64 %index.next, 1024
5347  br i1 %2, label %for.cond.cleanup, label %vector.body
5348
5349for.cond.cleanup:                                 ; preds = %vector.body
5350  ret void
5351}
5352
5353define void @sink_splat_vp_uadd_sat_commute(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5354; CHECK-LABEL: sink_splat_vp_uadd_sat_commute:
5355; CHECK:       # %bb.0: # %entry
5356; CHECK-NEXT:    lui a3, 1
5357; CHECK-NEXT:    add a3, a0, a3
5358; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5359; CHECK-NEXT:  .LBB115_1: # %vector.body
5360; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5361; CHECK-NEXT:    vle32.v v8, (a0)
5362; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5363; CHECK-NEXT:    vsaddu.vx v8, v8, a1, v0.t
5364; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5365; CHECK-NEXT:    vse32.v v8, (a0)
5366; CHECK-NEXT:    addi a0, a0, 16
5367; CHECK-NEXT:    bne a0, a3, .LBB115_1
5368; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5369; CHECK-NEXT:    ret
5370entry:
5371  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5372  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5373  br label %vector.body
5374
5375vector.body:                                      ; preds = %vector.body, %entry
5376  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5377  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5378  %wide.load = load <4 x i32>, ptr %0, align 4
5379  %1 = call <4 x i32> @llvm.vp.uadd.sat.v4i32(<4 x i32> %broadcast.splat, <4 x i32> %wide.load, <4 x i1> %m, i32 %vl)
5380  store <4 x i32> %1, ptr %0, align 4
5381  %index.next = add nuw i64 %index, 4
5382  %2 = icmp eq i64 %index.next, 1024
5383  br i1 %2, label %for.cond.cleanup, label %vector.body
5384
5385for.cond.cleanup:                                 ; preds = %vector.body
5386  ret void
5387}
5388
5389declare <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32>, <4 x i32>, <4 x i1>, i32)
5390
5391define void @sink_splat_vp_usub_sat(ptr nocapture %a, i32 signext %x, <4 x i1> %m, i32 zeroext %vl) {
5392; CHECK-LABEL: sink_splat_vp_usub_sat:
5393; CHECK:       # %bb.0: # %entry
5394; CHECK-NEXT:    li a3, 1024
5395; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5396; CHECK-NEXT:  .LBB116_1: # %vector.body
5397; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5398; CHECK-NEXT:    vle32.v v8, (a0)
5399; CHECK-NEXT:    addi a3, a3, 4
5400; CHECK-NEXT:    vsetvli zero, a2, e32, m1, ta, ma
5401; CHECK-NEXT:    vssubu.vx v8, v8, a1, v0.t
5402; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5403; CHECK-NEXT:    vse32.v v8, (a0)
5404; CHECK-NEXT:    addi a0, a0, -16
5405; CHECK-NEXT:    bnez a3, .LBB116_1
5406; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5407; CHECK-NEXT:    ret
5408entry:
5409  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5410  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5411  br label %vector.body
5412
5413vector.body:                                      ; preds = %vector.body, %entry
5414  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5415  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5416  %wide.load = load <4 x i32>, ptr %0, align 4
5417  %1 = call <4 x i32> @llvm.vp.usub.sat.v4i32(<4 x i32> %wide.load, <4 x i32> %broadcast.splat, <4 x i1> %m, i32 %vl)
5418  store <4 x i32> %1, ptr %0, align 4
5419  %index.next = sub nuw i64 %index, 4
5420  %2 = icmp eq i64 %index.next, 1024
5421  br i1 %2, label %for.cond.cleanup, label %vector.body
5422
5423for.cond.cleanup:                                 ; preds = %vector.body
5424  ret void
5425}
5426
5427define void @sink_splat_select_op1(ptr nocapture %a, i32 signext %x) {
5428; CHECK-LABEL: sink_splat_select_op1:
5429; CHECK:       # %bb.0: # %entry
5430; CHECK-NEXT:    lui a2, 1
5431; CHECK-NEXT:    add a2, a0, a2
5432; CHECK-NEXT:    li a3, 42
5433; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5434; CHECK-NEXT:  .LBB117_1: # %vector.body
5435; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5436; CHECK-NEXT:    vle32.v v8, (a0)
5437; CHECK-NEXT:    vmseq.vx v0, v8, a3
5438; CHECK-NEXT:    vmerge.vxm v8, v8, a1, v0
5439; CHECK-NEXT:    vse32.v v8, (a0)
5440; CHECK-NEXT:    addi a0, a0, 16
5441; CHECK-NEXT:    bne a0, a2, .LBB117_1
5442; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5443; CHECK-NEXT:    ret
5444entry:
5445  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5446  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5447  br label %vector.body
5448
5449vector.body:                                      ; preds = %vector.body, %entry
5450  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5451  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5452  %load = load <4 x i32>, ptr %0, align 4
5453  %cond = icmp eq <4 x i32> %load, splat (i32 42)
5454  %1 = select <4 x i1> %cond, <4 x i32> %broadcast.splat, <4 x i32> %load
5455  store <4 x i32> %1, ptr %0, align 4
5456  %index.next = add nuw i64 %index, 4
5457  %2 = icmp eq i64 %index.next, 1024
5458  br i1 %2, label %for.cond.cleanup, label %vector.body
5459
5460for.cond.cleanup:                                 ; preds = %vector.body
5461  ret void
5462}
5463
5464define void @sink_splat_select_op2(ptr nocapture %a, i32 signext %x) {
5465; CHECK-LABEL: sink_splat_select_op2:
5466; CHECK:       # %bb.0: # %entry
5467; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5468; CHECK-NEXT:    vmv.v.x v8, a1
5469; CHECK-NEXT:    lui a1, 1
5470; CHECK-NEXT:    add a1, a0, a1
5471; CHECK-NEXT:    li a2, 42
5472; CHECK-NEXT:  .LBB118_1: # %vector.body
5473; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5474; CHECK-NEXT:    vle32.v v9, (a0)
5475; CHECK-NEXT:    vmseq.vx v0, v9, a2
5476; CHECK-NEXT:    vmerge.vvm v9, v8, v9, v0
5477; CHECK-NEXT:    vse32.v v9, (a0)
5478; CHECK-NEXT:    addi a0, a0, 16
5479; CHECK-NEXT:    bne a0, a1, .LBB118_1
5480; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5481; CHECK-NEXT:    ret
5482entry:
5483  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5484  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5485  br label %vector.body
5486
5487vector.body:                                      ; preds = %vector.body, %entry
5488  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5489  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5490  %load = load <4 x i32>, ptr %0, align 4
5491  %cond = icmp eq <4 x i32> %load, splat (i32 42)
5492  %1 = select <4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat
5493  store <4 x i32> %1, ptr %0, align 4
5494  %index.next = add nuw i64 %index, 4
5495  %2 = icmp eq i64 %index.next, 1024
5496  br i1 %2, label %for.cond.cleanup, label %vector.body
5497
5498for.cond.cleanup:                                 ; preds = %vector.body
5499  ret void
5500}
5501
5502define void @sink_splat_vp_select_op1(ptr %a, i32 %x, i32 %vl) {
5503; CHECK-LABEL: sink_splat_vp_select_op1:
5504; CHECK:       # %bb.0: # %entry
5505; CHECK-NEXT:    lui a4, 1
5506; CHECK-NEXT:    li a3, 42
5507; CHECK-NEXT:    slli a5, a2, 32
5508; CHECK-NEXT:    add a2, a0, a4
5509; CHECK-NEXT:    srli a4, a5, 32
5510; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5511; CHECK-NEXT:  .LBB119_1: # %vector.body
5512; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5513; CHECK-NEXT:    vle32.v v8, (a0)
5514; CHECK-NEXT:    vmseq.vx v0, v8, a3
5515; CHECK-NEXT:    vsetvli zero, a4, e32, m1, ta, ma
5516; CHECK-NEXT:    vmerge.vxm v8, v8, a1, v0
5517; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5518; CHECK-NEXT:    vse32.v v8, (a0)
5519; CHECK-NEXT:    addi a0, a0, 16
5520; CHECK-NEXT:    bne a0, a2, .LBB119_1
5521; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5522; CHECK-NEXT:    ret
5523entry:
5524  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5525  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5526  br label %vector.body
5527
5528vector.body:
5529  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5530  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5531  %load = load <4 x i32>, ptr %0, align 4
5532  %cond = icmp eq <4 x i32> %load, splat (i32 42)
5533  %1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %broadcast.splat, <4 x i32> %load, i32 %vl)
5534  store <4 x i32> %1, ptr %0, align 4
5535  %index.next = add nuw i64 %index, 4
5536  %2 = icmp eq i64 %index.next, 1024
5537  br i1 %2, label %for.cond.cleanup, label %vector.body
5538
5539for.cond.cleanup:
5540  ret void
5541}
5542
5543define void @sink_splat_vp_select_op2(ptr %a, i32 %x, i32 %vl) {
5544; CHECK-LABEL: sink_splat_vp_select_op2:
5545; CHECK:       # %bb.0: # %entry
5546; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5547; CHECK-NEXT:    vmv.v.x v8, a1
5548; CHECK-NEXT:    lui a3, 1
5549; CHECK-NEXT:    li a1, 42
5550; CHECK-NEXT:    slli a4, a2, 32
5551; CHECK-NEXT:    add a2, a0, a3
5552; CHECK-NEXT:    srli a3, a4, 32
5553; CHECK-NEXT:  .LBB120_1: # %vector.body
5554; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5555; CHECK-NEXT:    vle32.v v9, (a0)
5556; CHECK-NEXT:    vmseq.vx v0, v9, a1
5557; CHECK-NEXT:    vsetvli zero, a3, e32, m1, ta, ma
5558; CHECK-NEXT:    vmerge.vvm v9, v8, v9, v0
5559; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5560; CHECK-NEXT:    vse32.v v9, (a0)
5561; CHECK-NEXT:    addi a0, a0, 16
5562; CHECK-NEXT:    bne a0, a2, .LBB120_1
5563; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5564; CHECK-NEXT:    ret
5565entry:
5566  %broadcast.splatinsert = insertelement <4 x i32> poison, i32 %x, i32 0
5567  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> poison, <4 x i32> zeroinitializer
5568  br label %vector.body
5569
5570vector.body:
5571  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5572  %0 = getelementptr inbounds i32, ptr %a, i64 %index
5573  %load = load <4 x i32>, ptr %0, align 4
5574  %cond = icmp eq <4 x i32> %load, splat (i32 42)
5575  %1 = call <4 x i32> @llvm.vp.select(<4 x i1> %cond, <4 x i32> %load, <4 x i32> %broadcast.splat, i32 %vl)
5576  store <4 x i32> %1, ptr %0, align 4
5577  %index.next = add nuw i64 %index, 4
5578  %2 = icmp eq i64 %index.next, 1024
5579  br i1 %2, label %for.cond.cleanup, label %vector.body
5580
5581for.cond.cleanup:
5582  ret void
5583}
5584
5585define void @sink_splat_fmuladd(ptr %a, ptr %b, float %x) {
5586; CHECK-LABEL: sink_splat_fmuladd:
5587; CHECK:       # %bb.0: # %entry
5588; CHECK-NEXT:    lui a2, 1
5589; CHECK-NEXT:    add a2, a1, a2
5590; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5591; CHECK-NEXT:  .LBB121_1: # %vector.body
5592; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5593; CHECK-NEXT:    vle32.v v8, (a0)
5594; CHECK-NEXT:    vle32.v v9, (a1)
5595; CHECK-NEXT:    addi a1, a1, 16
5596; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
5597; CHECK-NEXT:    vse32.v v9, (a0)
5598; CHECK-NEXT:    addi a0, a0, 16
5599; CHECK-NEXT:    bne a1, a2, .LBB121_1
5600; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5601; CHECK-NEXT:    ret
5602entry:
5603  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
5604  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
5605  br label %vector.body
5606
5607vector.body:
5608  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5609  %0 = getelementptr inbounds float, ptr %a, i64 %index
5610  %wide.load = load <4 x float>, ptr %0, align 4
5611  %1 = getelementptr inbounds float, ptr %b, i64 %index
5612  %wide.load12 = load <4 x float>, ptr %1, align 4
5613  %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12)
5614  store <4 x float> %2, ptr %0, align 4
5615  %index.next = add nuw i64 %index, 4
5616  %3 = icmp eq i64 %index.next, 1024
5617  br i1 %3, label %for.cond.cleanup, label %vector.body
5618
5619for.cond.cleanup:
5620  ret void
5621}
5622
5623define void @sink_splat_fmuladd_commute(ptr %a, ptr %b, float %x) {
5624; CHECK-LABEL: sink_splat_fmuladd_commute:
5625; CHECK:       # %bb.0: # %entry
5626; CHECK-NEXT:    lui a2, 1
5627; CHECK-NEXT:    add a2, a1, a2
5628; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5629; CHECK-NEXT:  .LBB122_1: # %vector.body
5630; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5631; CHECK-NEXT:    vle32.v v8, (a0)
5632; CHECK-NEXT:    vle32.v v9, (a1)
5633; CHECK-NEXT:    addi a1, a1, 16
5634; CHECK-NEXT:    vfmacc.vf v9, fa0, v8
5635; CHECK-NEXT:    vse32.v v9, (a0)
5636; CHECK-NEXT:    addi a0, a0, 16
5637; CHECK-NEXT:    bne a1, a2, .LBB122_1
5638; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5639; CHECK-NEXT:    ret
5640entry:
5641  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
5642  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
5643  br label %vector.body
5644
5645vector.body:
5646  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5647  %0 = getelementptr inbounds float, ptr %a, i64 %index
5648  %wide.load = load <4 x float>, ptr %0, align 4
5649  %1 = getelementptr inbounds float, ptr %b, i64 %index
5650  %wide.load12 = load <4 x float>, ptr %1, align 4
5651  %2 = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12)
5652  store <4 x float> %2, ptr %0, align 4
5653  %index.next = add nuw i64 %index, 4
5654  %3 = icmp eq i64 %index.next, 1024
5655  br i1 %3, label %for.cond.cleanup, label %vector.body
5656
5657for.cond.cleanup:
5658  ret void
5659}
5660
5661define void @sink_splat_vp_fmuladd(ptr %a, ptr %b, float %x, <4 x i1> %m, i32 %vl) {
5662; CHECK-LABEL: sink_splat_vp_fmuladd:
5663; CHECK:       # %bb.0: # %entry
5664; CHECK-NEXT:    lui a3, 1
5665; CHECK-NEXT:    slli a4, a2, 32
5666; CHECK-NEXT:    add a2, a1, a3
5667; CHECK-NEXT:    srli a3, a4, 32
5668; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5669; CHECK-NEXT:  .LBB123_1: # %vector.body
5670; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5671; CHECK-NEXT:    vle32.v v8, (a0)
5672; CHECK-NEXT:    vle32.v v9, (a1)
5673; CHECK-NEXT:    addi a1, a1, 16
5674; CHECK-NEXT:    vsetvli zero, a3, e32, m1, ta, ma
5675; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
5676; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5677; CHECK-NEXT:    vse32.v v8, (a0)
5678; CHECK-NEXT:    addi a0, a0, 16
5679; CHECK-NEXT:    bne a1, a2, .LBB123_1
5680; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5681; CHECK-NEXT:    ret
5682entry:
5683  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
5684  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
5685  br label %vector.body
5686
5687vector.body:
5688  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5689  %0 = getelementptr inbounds float, ptr %a, i64 %index
5690  %wide.load = load <4 x float>, ptr %0, align 4
5691  %1 = getelementptr inbounds float, ptr %b, i64 %index
5692  %wide.load12 = load <4 x float>, ptr %1, align 4
5693  %2 = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %wide.load, <4 x float> %broadcast.splat, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
5694  store <4 x float> %2, ptr %0, align 4
5695  %index.next = add nuw i64 %index, 4
5696  %3 = icmp eq i64 %index.next, 1024
5697  br i1 %3, label %for.cond.cleanup, label %vector.body
5698
5699for.cond.cleanup:
5700  ret void
5701}
5702
5703define void @sink_splat_vp_fmuladd_commute(ptr %a, ptr %b, float %x, <4 x i1> %m, i32 %vl) {
5704; CHECK-LABEL: sink_splat_vp_fmuladd_commute:
5705; CHECK:       # %bb.0: # %entry
5706; CHECK-NEXT:    lui a3, 1
5707; CHECK-NEXT:    slli a4, a2, 32
5708; CHECK-NEXT:    add a2, a1, a3
5709; CHECK-NEXT:    srli a3, a4, 32
5710; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5711; CHECK-NEXT:  .LBB124_1: # %vector.body
5712; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
5713; CHECK-NEXT:    vle32.v v8, (a0)
5714; CHECK-NEXT:    vle32.v v9, (a1)
5715; CHECK-NEXT:    addi a1, a1, 16
5716; CHECK-NEXT:    vsetvli zero, a3, e32, m1, ta, ma
5717; CHECK-NEXT:    vfmadd.vf v8, fa0, v9, v0.t
5718; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5719; CHECK-NEXT:    vse32.v v8, (a0)
5720; CHECK-NEXT:    addi a0, a0, 16
5721; CHECK-NEXT:    bne a1, a2, .LBB124_1
5722; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
5723; CHECK-NEXT:    ret
5724entry:
5725  %broadcast.splatinsert = insertelement <4 x float> poison, float %x, i32 0
5726  %broadcast.splat = shufflevector <4 x float> %broadcast.splatinsert, <4 x float> poison, <4 x i32> zeroinitializer
5727  br label %vector.body
5728
5729vector.body:
5730  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
5731  %0 = getelementptr inbounds float, ptr %a, i64 %index
5732  %wide.load = load <4 x float>, ptr %0, align 4
5733  %1 = getelementptr inbounds float, ptr %b, i64 %index
5734  %wide.load12 = load <4 x float>, ptr %1, align 4
5735  %2 = call <4 x float> @llvm.vp.fmuladd.v4f32(<4 x float> %broadcast.splat, <4 x float> %wide.load, <4 x float> %wide.load12, <4 x i1> %m, i32 %vl)
5736  store <4 x float> %2, ptr %0, align 4
5737  %index.next = add nuw i64 %index, 4
5738  %3 = icmp eq i64 %index.next, 1024
5739  br i1 %3, label %for.cond.cleanup, label %vector.body
5740
5741for.cond.cleanup:
5742  ret void
5743}
5744