xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/pr95865.ll (revision 9122c5235ec85ce0c0ad337e862b006e7b349d84)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -mtriple=riscv64 -mattr=+v,+c < %s | FileCheck %s
3
4; This previously crashed when spilling a GPR because when we removed a dead
5; ADDI we weren't removing it from the LIS instruction map. Needs +c to trigger.
6
7define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, <vscale x 4 x i1> %arg.6, i64 %arg.7, i1 %arg.8, i64 %arg.9, i32 %arg.10) vscale_range(2,2) {
8; CHECK-LABEL: main:
9; CHECK:       # %bb.0: # %entry
10; CHECK-NEXT:    addi sp, sp, -112
11; CHECK-NEXT:    .cfi_def_cfa_offset 112
12; CHECK-NEXT:    sd ra, 104(sp) # 8-byte Folded Spill
13; CHECK-NEXT:    sd s0, 96(sp) # 8-byte Folded Spill
14; CHECK-NEXT:    sd s1, 88(sp) # 8-byte Folded Spill
15; CHECK-NEXT:    sd s2, 80(sp) # 8-byte Folded Spill
16; CHECK-NEXT:    sd s3, 72(sp) # 8-byte Folded Spill
17; CHECK-NEXT:    sd s4, 64(sp) # 8-byte Folded Spill
18; CHECK-NEXT:    sd s5, 56(sp) # 8-byte Folded Spill
19; CHECK-NEXT:    sd s6, 48(sp) # 8-byte Folded Spill
20; CHECK-NEXT:    sd s7, 40(sp) # 8-byte Folded Spill
21; CHECK-NEXT:    sd s8, 32(sp) # 8-byte Folded Spill
22; CHECK-NEXT:    sd s9, 24(sp) # 8-byte Folded Spill
23; CHECK-NEXT:    sd s10, 16(sp) # 8-byte Folded Spill
24; CHECK-NEXT:    sd s11, 8(sp) # 8-byte Folded Spill
25; CHECK-NEXT:    .cfi_offset ra, -8
26; CHECK-NEXT:    .cfi_offset s0, -16
27; CHECK-NEXT:    .cfi_offset s1, -24
28; CHECK-NEXT:    .cfi_offset s2, -32
29; CHECK-NEXT:    .cfi_offset s3, -40
30; CHECK-NEXT:    .cfi_offset s4, -48
31; CHECK-NEXT:    .cfi_offset s5, -56
32; CHECK-NEXT:    .cfi_offset s6, -64
33; CHECK-NEXT:    .cfi_offset s7, -72
34; CHECK-NEXT:    .cfi_offset s8, -80
35; CHECK-NEXT:    .cfi_offset s9, -88
36; CHECK-NEXT:    .cfi_offset s10, -96
37; CHECK-NEXT:    .cfi_offset s11, -104
38; CHECK-NEXT:    li a6, 0
39; CHECK-NEXT:    li s2, 8
40; CHECK-NEXT:    li t0, 12
41; CHECK-NEXT:    li s0, 4
42; CHECK-NEXT:    li t1, 20
43; CHECK-NEXT:    ld a1, 112(sp)
44; CHECK-NEXT:    sd a1, 0(sp) # 8-byte Folded Spill
45; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
46; CHECK-NEXT:    vmv.v.i v8, 0
47; CHECK-NEXT:    andi t3, a4, 1
48; CHECK-NEXT:    li t2, 4
49; CHECK-NEXT:  .LBB0_1: # %for.cond1.preheader.i
50; CHECK-NEXT:    # =>This Loop Header: Depth=1
51; CHECK-NEXT:    # Child Loop BB0_2 Depth 2
52; CHECK-NEXT:    # Child Loop BB0_3 Depth 3
53; CHECK-NEXT:    # Child Loop BB0_4 Depth 4
54; CHECK-NEXT:    # Child Loop BB0_5 Depth 5
55; CHECK-NEXT:    mv t4, t1
56; CHECK-NEXT:    mv t5, t2
57; CHECK-NEXT:    mv t6, t0
58; CHECK-NEXT:    mv a7, s2
59; CHECK-NEXT:    mv s4, a6
60; CHECK-NEXT:  .LBB0_2: # %for.cond5.preheader.i
61; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
62; CHECK-NEXT:    # => This Loop Header: Depth=2
63; CHECK-NEXT:    # Child Loop BB0_3 Depth 3
64; CHECK-NEXT:    # Child Loop BB0_4 Depth 4
65; CHECK-NEXT:    # Child Loop BB0_5 Depth 5
66; CHECK-NEXT:    mv s5, t4
67; CHECK-NEXT:    mv s6, t5
68; CHECK-NEXT:    mv s7, t6
69; CHECK-NEXT:    mv s3, a7
70; CHECK-NEXT:    mv s9, s4
71; CHECK-NEXT:  .LBB0_3: # %for.cond9.preheader.i
72; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
73; CHECK-NEXT:    # Parent Loop BB0_2 Depth=2
74; CHECK-NEXT:    # => This Loop Header: Depth=3
75; CHECK-NEXT:    # Child Loop BB0_4 Depth 4
76; CHECK-NEXT:    # Child Loop BB0_5 Depth 5
77; CHECK-NEXT:    mv s11, s5
78; CHECK-NEXT:    mv a3, s6
79; CHECK-NEXT:    mv ra, s7
80; CHECK-NEXT:    mv s8, s3
81; CHECK-NEXT:    mv s1, s9
82; CHECK-NEXT:  .LBB0_4: # %vector.ph.i
83; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
84; CHECK-NEXT:    # Parent Loop BB0_2 Depth=2
85; CHECK-NEXT:    # Parent Loop BB0_3 Depth=3
86; CHECK-NEXT:    # => This Loop Header: Depth=4
87; CHECK-NEXT:    # Child Loop BB0_5 Depth 5
88; CHECK-NEXT:    li a1, 0
89; CHECK-NEXT:  .LBB0_5: # %vector.body.i
90; CHECK-NEXT:    # Parent Loop BB0_1 Depth=1
91; CHECK-NEXT:    # Parent Loop BB0_2 Depth=2
92; CHECK-NEXT:    # Parent Loop BB0_3 Depth=3
93; CHECK-NEXT:    # Parent Loop BB0_4 Depth=4
94; CHECK-NEXT:    # => This Inner Loop Header: Depth=5
95; CHECK-NEXT:    addi a5, a1, 4
96; CHECK-NEXT:    add a4, s8, a1
97; CHECK-NEXT:    add a1, a1, a3
98; CHECK-NEXT:    vse32.v v8, (a4), v0.t
99; CHECK-NEXT:    vse32.v v8, (a1), v0.t
100; CHECK-NEXT:    mv a1, a5
101; CHECK-NEXT:    bne a5, s0, .LBB0_5
102; CHECK-NEXT:  # %bb.6: # %for.cond.cleanup15.i
103; CHECK-NEXT:    # in Loop: Header=BB0_4 Depth=4
104; CHECK-NEXT:    addi s1, s1, 4
105; CHECK-NEXT:    addi s8, s8, 4
106; CHECK-NEXT:    addi ra, ra, 4
107; CHECK-NEXT:    addi a3, a3, 4
108; CHECK-NEXT:    andi s10, a0, 1
109; CHECK-NEXT:    addi s11, s11, 4
110; CHECK-NEXT:    beqz s10, .LBB0_4
111; CHECK-NEXT:  # %bb.7: # %for.cond.cleanup11.i
112; CHECK-NEXT:    # in Loop: Header=BB0_3 Depth=3
113; CHECK-NEXT:    addi s9, s9, 4
114; CHECK-NEXT:    addi s3, s3, 4
115; CHECK-NEXT:    addi s7, s7, 4
116; CHECK-NEXT:    addi s6, s6, 4
117; CHECK-NEXT:    andi a1, a2, 1
118; CHECK-NEXT:    addi s5, s5, 4
119; CHECK-NEXT:    beqz a1, .LBB0_3
120; CHECK-NEXT:  # %bb.8: # %for.cond.cleanup7.i
121; CHECK-NEXT:    # in Loop: Header=BB0_2 Depth=2
122; CHECK-NEXT:    addi s4, s4, 4
123; CHECK-NEXT:    addi a7, a7, 4
124; CHECK-NEXT:    addi t6, t6, 4
125; CHECK-NEXT:    addi t5, t5, 4
126; CHECK-NEXT:    addi t4, t4, 4
127; CHECK-NEXT:    beqz t3, .LBB0_2
128; CHECK-NEXT:  # %bb.9: # %for.cond.cleanup3.i
129; CHECK-NEXT:    # in Loop: Header=BB0_1 Depth=1
130; CHECK-NEXT:    addi a6, a6, 4
131; CHECK-NEXT:    addi s2, s2, 4
132; CHECK-NEXT:    addi t0, t0, 4
133; CHECK-NEXT:    addi t2, t2, 4
134; CHECK-NEXT:    addi t1, t1, 4
135; CHECK-NEXT:    beqz a1, .LBB0_1
136; CHECK-NEXT:  # %bb.10: # %l.exit
137; CHECK-NEXT:    li a0, 0
138; CHECK-NEXT:    jalr a0
139; CHECK-NEXT:    beqz s10, .LBB0_12
140; CHECK-NEXT:  .LBB0_11: # %for.body7.us.14
141; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
142; CHECK-NEXT:    j .LBB0_11
143; CHECK-NEXT:  .LBB0_12: # %for.body7.us.19
144; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
145; CHECK-NEXT:    ld a0, 0(sp) # 8-byte Folded Reload
146; CHECK-NEXT:    vmv.s.x v8, a0
147; CHECK-NEXT:    vmv.v.i v16, 0
148; CHECK-NEXT:    vsetivli zero, 2, e32, m1, tu, ma
149; CHECK-NEXT:    vslideup.vi v16, v8, 1
150; CHECK-NEXT:    vsetvli a0, zero, e32, m8, ta, ma
151; CHECK-NEXT:    vmsne.vi v8, v16, 0
152; CHECK-NEXT:    vmv.x.s a0, v8
153; CHECK-NEXT:    snez a0, a0
154; CHECK-NEXT:    sb a0, 0(zero)
155; CHECK-NEXT:    li a0, 0
156; CHECK-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
157; CHECK-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload
158; CHECK-NEXT:    ld s1, 88(sp) # 8-byte Folded Reload
159; CHECK-NEXT:    ld s2, 80(sp) # 8-byte Folded Reload
160; CHECK-NEXT:    ld s3, 72(sp) # 8-byte Folded Reload
161; CHECK-NEXT:    ld s4, 64(sp) # 8-byte Folded Reload
162; CHECK-NEXT:    ld s5, 56(sp) # 8-byte Folded Reload
163; CHECK-NEXT:    ld s6, 48(sp) # 8-byte Folded Reload
164; CHECK-NEXT:    ld s7, 40(sp) # 8-byte Folded Reload
165; CHECK-NEXT:    ld s8, 32(sp) # 8-byte Folded Reload
166; CHECK-NEXT:    ld s9, 24(sp) # 8-byte Folded Reload
167; CHECK-NEXT:    ld s10, 16(sp) # 8-byte Folded Reload
168; CHECK-NEXT:    ld s11, 8(sp) # 8-byte Folded Reload
169; CHECK-NEXT:    .cfi_restore ra
170; CHECK-NEXT:    .cfi_restore s0
171; CHECK-NEXT:    .cfi_restore s1
172; CHECK-NEXT:    .cfi_restore s2
173; CHECK-NEXT:    .cfi_restore s3
174; CHECK-NEXT:    .cfi_restore s4
175; CHECK-NEXT:    .cfi_restore s5
176; CHECK-NEXT:    .cfi_restore s6
177; CHECK-NEXT:    .cfi_restore s7
178; CHECK-NEXT:    .cfi_restore s8
179; CHECK-NEXT:    .cfi_restore s9
180; CHECK-NEXT:    .cfi_restore s10
181; CHECK-NEXT:    .cfi_restore s11
182; CHECK-NEXT:    addi sp, sp, 112
183; CHECK-NEXT:    .cfi_def_cfa_offset 0
184; CHECK-NEXT:    ret
185entry:
186  %0 = tail call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
187  br label %for.cond1.preheader.i
188
189for.cond1.preheader.i:                            ; preds = %for.cond.cleanup3.i, %entry
190  %arg.21 = phi i64 [ 0, %entry ], [ %indvars.iv.next74.i, %for.cond.cleanup3.i ]
191  br label %for.cond5.preheader.i
192
193for.cond5.preheader.i:                            ; preds = %for.cond.cleanup7.i, %for.cond1.preheader.i
194  %arg.42 = phi i64 [ 0, %for.cond1.preheader.i ], [ %indvars.iv.next70.i, %for.cond.cleanup7.i ]
195  %1 = add i64 %arg.42, %arg.21
196  br label %for.cond9.preheader.i
197
198for.cond.cleanup3.i:                              ; preds = %for.cond.cleanup7.i
199  %indvars.iv.next74.i = add i64 %arg.21, 1
200  br i1 %arg.3, label %l.exit, label %for.cond1.preheader.i
201
202for.cond9.preheader.i:                            ; preds = %for.cond.cleanup11.i, %for.cond5.preheader.i
203  %arg.74 = phi i64 [ 0, %for.cond5.preheader.i ], [ %indvars.iv.next66.i, %for.cond.cleanup11.i ]
204  %2 = add i64 %1, %arg.74
205  br label %vector.ph.i
206
207for.cond.cleanup7.i:                              ; preds = %for.cond.cleanup11.i
208  %indvars.iv.next70.i = add i64 %arg.42, 1
209  br i1 %arg.5, label %for.cond.cleanup3.i, label %for.cond5.preheader.i
210
211vector.ph.i:                                      ; preds = %for.cond.cleanup15.i, %for.cond9.preheader.i
212  %arg.96 = phi i64 [ 0, %for.cond9.preheader.i ], [ %indvars.iv.next62.i, %for.cond.cleanup15.i ]
213  %3 = add i64 %2, %arg.96
214  %broadcast.splatinsert.i = insertelement <vscale x 4 x i64> zeroinitializer, i64 %3, i64 0
215  %broadcast.splat.i = shufflevector <vscale x 4 x i64> %broadcast.splatinsert.i, <vscale x 4 x i64> zeroinitializer, <vscale x 4 x i32> zeroinitializer
216  br label %vector.body.i
217
218vector.body.i:                                    ; preds = %vector.body.i, %vector.ph.i
219  %index.i = phi i64 [ 0, %vector.ph.i ], [ %index.next.i, %vector.body.i ]
220  %vec.ind.i = phi <vscale x 4 x i64> [ %0, %vector.ph.i ], [ %6, %vector.body.i ]
221  %4 = add <vscale x 4 x i64> %vec.ind.i, %broadcast.splat.i
222  %5 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %4
223  tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %5, i32 4, <vscale x 4 x i1> zeroinitializer)
224  %6 = add <vscale x 4 x i64> %vec.ind.i, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
225  %7 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %6
226  tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %7, i32 4, <vscale x 4 x i1> zeroinitializer)
227  %arg.100 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 2, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
228  %arg.101 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.100
229  tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.101, i32 4, <vscale x 4 x i1> %arg.6)
230  %arg.102 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
231  %arg.103 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.102
232  tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.103, i32 4, <vscale x 4 x i1> zeroinitializer)
233  %arg.104 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
234  %arg.105 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.104
235  tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.105, i32 4, <vscale x 4 x i1> %arg.6)
236  %arg.106 = add <vscale x 4 x i64> %4, shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 5, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
237  %arg.107 = getelementptr [6 x i32], ptr null, i64 0, <vscale x 4 x i64> %arg.106
238  tail call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> %arg.107, i32 4, <vscale x 4 x i1> zeroinitializer)
239  %index.next.i = add i64 %index.i, 1
240  %arg.108 = icmp eq i64 %index.i, 0
241  br i1 %arg.108, label %for.cond.cleanup15.i, label %vector.body.i
242
243for.cond.cleanup11.i:                             ; preds = %for.cond.cleanup15.i
244  %indvars.iv.next66.i = add i64 %arg.74, 1
245  br i1 %arg.3, label %for.cond.cleanup7.i, label %for.cond9.preheader.i
246
247for.cond.cleanup15.i:                             ; preds = %vector.body.i
248  %indvars.iv.next62.i = add i64 %arg.96, 1
249  br i1 %arg.1, label %for.cond.cleanup11.i, label %vector.ph.i
250
251l.exit:                                           ; preds = %for.cond.cleanup3.i
252  tail call void null()
253  br i1 %arg.1, label %for.body7.us.14, label %for.body7.us.19
254
255for.body7.us.14:                                  ; preds = %for.body7.us.14, %l.exit
256  br label %for.body7.us.14
257
258for.body7.us.19:                                  ; preds = %l.exit
259  %arg.109 = insertelement <32 x i32> zeroinitializer, i32 %arg.10, i64 1
260  %8 = icmp ne <32 x i32> %arg.109, zeroinitializer
261  %9 = bitcast <32 x i1> %8 to i32
262  %op.rdx13 = icmp ne i32 %9, 0
263  %op.rdx = zext i1 %op.rdx13 to i8
264  store i8 %op.rdx, ptr null, align 1
265  ret i32 0
266}
267