xref: /llvm-project/llvm/test/CodeGen/RISCV/memset-pattern.ll (revision 298127dcbe2ecd1f3c49c2109ac96654778f20be)
1*298127dcSAlex Bradbury; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2*298127dcSAlex Bradbury; RUN: llc < %s -mtriple=riscv32 -mattr=+m \
3*298127dcSAlex Bradbury; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
4*298127dcSAlex Bradbury; RUN: llc < %s -mtriple=riscv64 -mattr=+m \
5*298127dcSAlex Bradbury; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
6*298127dcSAlex Bradbury; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \
7*298127dcSAlex Bradbury; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
8*298127dcSAlex Bradbury; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \
9*298127dcSAlex Bradbury; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10*298127dcSAlex Bradbury
11*298127dcSAlex Bradbury; TODO: Due to the initial naive lowering implementation of memset.pattern in
12*298127dcSAlex Bradbury; PreISelIntrinsicLowering, the generated code is not good.
13*298127dcSAlex Bradbury
14*298127dcSAlex Bradburydefine void @memset_1(ptr %a, i128 %value) nounwind {
15*298127dcSAlex Bradbury; RV32-BOTH-LABEL: memset_1:
16*298127dcSAlex Bradbury; RV32-BOTH:       # %bb.0: # %loadstoreloop.preheader
17*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    li a2, 0
18*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a3, 0(a1)
19*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a4, 4(a1)
20*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a5, 8(a1)
21*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a1, 12(a1)
22*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    li a6, 0
23*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  .LBB0_1: # %loadstoreloop
24*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
25*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    slli a7, a2, 4
26*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    addi a2, a2, 1
27*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    add a7, a0, a7
28*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    seqz t0, a2
29*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    add a6, a6, t0
30*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    or t0, a2, a6
31*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a3, 0(a7)
32*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a4, 4(a7)
33*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a5, 8(a7)
34*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a1, 12(a7)
35*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    beqz t0, .LBB0_1
36*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  # %bb.2: # %split
37*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    ret
38*298127dcSAlex Bradbury;
39*298127dcSAlex Bradbury; RV64-BOTH-LABEL: memset_1:
40*298127dcSAlex Bradbury; RV64-BOTH:       # %bb.0: # %loadstoreloop.preheader
41*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    addi a3, a0, 16
42*298127dcSAlex Bradbury; RV64-BOTH-NEXT:  .LBB0_1: # %loadstoreloop
43*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
44*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    sd a1, 0(a0)
45*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    sd a2, 8(a0)
46*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    addi a0, a0, 16
47*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    bne a0, a3, .LBB0_1
48*298127dcSAlex Bradbury; RV64-BOTH-NEXT:  # %bb.2: # %split
49*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    ret
50*298127dcSAlex Bradbury  tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 1, i1 0)
51*298127dcSAlex Bradbury  ret void
52*298127dcSAlex Bradbury}
53*298127dcSAlex Bradbury
54*298127dcSAlex Bradburydefine void @memset_1_noalign(ptr %a, i128 %value) nounwind {
55*298127dcSAlex Bradbury; RV32-LABEL: memset_1_noalign:
56*298127dcSAlex Bradbury; RV32:       # %bb.0: # %loadstoreloop.preheader
57*298127dcSAlex Bradbury; RV32-NEXT:    addi sp, sp, -32
58*298127dcSAlex Bradbury; RV32-NEXT:    sw s0, 28(sp) # 4-byte Folded Spill
59*298127dcSAlex Bradbury; RV32-NEXT:    sw s1, 24(sp) # 4-byte Folded Spill
60*298127dcSAlex Bradbury; RV32-NEXT:    sw s2, 20(sp) # 4-byte Folded Spill
61*298127dcSAlex Bradbury; RV32-NEXT:    sw s3, 16(sp) # 4-byte Folded Spill
62*298127dcSAlex Bradbury; RV32-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
63*298127dcSAlex Bradbury; RV32-NEXT:    sw s5, 8(sp) # 4-byte Folded Spill
64*298127dcSAlex Bradbury; RV32-NEXT:    li a2, 0
65*298127dcSAlex Bradbury; RV32-NEXT:    li a3, 0
66*298127dcSAlex Bradbury; RV32-NEXT:    lw a4, 4(a1)
67*298127dcSAlex Bradbury; RV32-NEXT:    lw a5, 0(a1)
68*298127dcSAlex Bradbury; RV32-NEXT:    lw a6, 8(a1)
69*298127dcSAlex Bradbury; RV32-NEXT:    lw a1, 12(a1)
70*298127dcSAlex Bradbury; RV32-NEXT:    srli a7, a4, 24
71*298127dcSAlex Bradbury; RV32-NEXT:    srli t0, a4, 16
72*298127dcSAlex Bradbury; RV32-NEXT:    srli t1, a4, 8
73*298127dcSAlex Bradbury; RV32-NEXT:    srli t2, a5, 24
74*298127dcSAlex Bradbury; RV32-NEXT:    srli t3, a5, 16
75*298127dcSAlex Bradbury; RV32-NEXT:    srli t4, a5, 8
76*298127dcSAlex Bradbury; RV32-NEXT:    srli t5, a6, 24
77*298127dcSAlex Bradbury; RV32-NEXT:    srli t6, a6, 16
78*298127dcSAlex Bradbury; RV32-NEXT:    srli s0, a6, 8
79*298127dcSAlex Bradbury; RV32-NEXT:    srli s1, a1, 24
80*298127dcSAlex Bradbury; RV32-NEXT:    srli s2, a1, 16
81*298127dcSAlex Bradbury; RV32-NEXT:    srli s3, a1, 8
82*298127dcSAlex Bradbury; RV32-NEXT:  .LBB1_1: # %loadstoreloop
83*298127dcSAlex Bradbury; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
84*298127dcSAlex Bradbury; RV32-NEXT:    slli s4, a2, 4
85*298127dcSAlex Bradbury; RV32-NEXT:    addi a2, a2, 1
86*298127dcSAlex Bradbury; RV32-NEXT:    add s4, a0, s4
87*298127dcSAlex Bradbury; RV32-NEXT:    seqz s5, a2
88*298127dcSAlex Bradbury; RV32-NEXT:    sb a4, 4(s4)
89*298127dcSAlex Bradbury; RV32-NEXT:    sb t1, 5(s4)
90*298127dcSAlex Bradbury; RV32-NEXT:    sb t0, 6(s4)
91*298127dcSAlex Bradbury; RV32-NEXT:    sb a7, 7(s4)
92*298127dcSAlex Bradbury; RV32-NEXT:    sb a5, 0(s4)
93*298127dcSAlex Bradbury; RV32-NEXT:    sb t4, 1(s4)
94*298127dcSAlex Bradbury; RV32-NEXT:    sb t3, 2(s4)
95*298127dcSAlex Bradbury; RV32-NEXT:    sb t2, 3(s4)
96*298127dcSAlex Bradbury; RV32-NEXT:    sb a6, 8(s4)
97*298127dcSAlex Bradbury; RV32-NEXT:    sb s0, 9(s4)
98*298127dcSAlex Bradbury; RV32-NEXT:    sb t6, 10(s4)
99*298127dcSAlex Bradbury; RV32-NEXT:    sb t5, 11(s4)
100*298127dcSAlex Bradbury; RV32-NEXT:    add a3, a3, s5
101*298127dcSAlex Bradbury; RV32-NEXT:    or s5, a2, a3
102*298127dcSAlex Bradbury; RV32-NEXT:    sb a1, 12(s4)
103*298127dcSAlex Bradbury; RV32-NEXT:    sb s3, 13(s4)
104*298127dcSAlex Bradbury; RV32-NEXT:    sb s2, 14(s4)
105*298127dcSAlex Bradbury; RV32-NEXT:    sb s1, 15(s4)
106*298127dcSAlex Bradbury; RV32-NEXT:    beqz s5, .LBB1_1
107*298127dcSAlex Bradbury; RV32-NEXT:  # %bb.2: # %split
108*298127dcSAlex Bradbury; RV32-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
109*298127dcSAlex Bradbury; RV32-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
110*298127dcSAlex Bradbury; RV32-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
111*298127dcSAlex Bradbury; RV32-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
112*298127dcSAlex Bradbury; RV32-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
113*298127dcSAlex Bradbury; RV32-NEXT:    lw s5, 8(sp) # 4-byte Folded Reload
114*298127dcSAlex Bradbury; RV32-NEXT:    addi sp, sp, 32
115*298127dcSAlex Bradbury; RV32-NEXT:    ret
116*298127dcSAlex Bradbury;
117*298127dcSAlex Bradbury; RV64-LABEL: memset_1_noalign:
118*298127dcSAlex Bradbury; RV64:       # %bb.0: # %loadstoreloop.preheader
119*298127dcSAlex Bradbury; RV64-NEXT:    addi sp, sp, -32
120*298127dcSAlex Bradbury; RV64-NEXT:    sd s0, 24(sp) # 8-byte Folded Spill
121*298127dcSAlex Bradbury; RV64-NEXT:    sd s1, 16(sp) # 8-byte Folded Spill
122*298127dcSAlex Bradbury; RV64-NEXT:    sd s2, 8(sp) # 8-byte Folded Spill
123*298127dcSAlex Bradbury; RV64-NEXT:    addi a3, a0, 16
124*298127dcSAlex Bradbury; RV64-NEXT:    srli a4, a1, 56
125*298127dcSAlex Bradbury; RV64-NEXT:    srli a5, a1, 48
126*298127dcSAlex Bradbury; RV64-NEXT:    srli a6, a1, 40
127*298127dcSAlex Bradbury; RV64-NEXT:    srli a7, a1, 32
128*298127dcSAlex Bradbury; RV64-NEXT:    srli t0, a1, 24
129*298127dcSAlex Bradbury; RV64-NEXT:    srli t1, a1, 16
130*298127dcSAlex Bradbury; RV64-NEXT:    srli t2, a1, 8
131*298127dcSAlex Bradbury; RV64-NEXT:    srli t3, a2, 56
132*298127dcSAlex Bradbury; RV64-NEXT:    srli t4, a2, 48
133*298127dcSAlex Bradbury; RV64-NEXT:    srli t5, a2, 40
134*298127dcSAlex Bradbury; RV64-NEXT:    srli t6, a2, 32
135*298127dcSAlex Bradbury; RV64-NEXT:    srli s0, a2, 24
136*298127dcSAlex Bradbury; RV64-NEXT:    srli s1, a2, 16
137*298127dcSAlex Bradbury; RV64-NEXT:    srli s2, a2, 8
138*298127dcSAlex Bradbury; RV64-NEXT:  .LBB1_1: # %loadstoreloop
139*298127dcSAlex Bradbury; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
140*298127dcSAlex Bradbury; RV64-NEXT:    sb a7, 4(a0)
141*298127dcSAlex Bradbury; RV64-NEXT:    sb a6, 5(a0)
142*298127dcSAlex Bradbury; RV64-NEXT:    sb a5, 6(a0)
143*298127dcSAlex Bradbury; RV64-NEXT:    sb a4, 7(a0)
144*298127dcSAlex Bradbury; RV64-NEXT:    sb a1, 0(a0)
145*298127dcSAlex Bradbury; RV64-NEXT:    sb t2, 1(a0)
146*298127dcSAlex Bradbury; RV64-NEXT:    sb t1, 2(a0)
147*298127dcSAlex Bradbury; RV64-NEXT:    sb t0, 3(a0)
148*298127dcSAlex Bradbury; RV64-NEXT:    sb t6, 12(a0)
149*298127dcSAlex Bradbury; RV64-NEXT:    sb t5, 13(a0)
150*298127dcSAlex Bradbury; RV64-NEXT:    sb t4, 14(a0)
151*298127dcSAlex Bradbury; RV64-NEXT:    sb t3, 15(a0)
152*298127dcSAlex Bradbury; RV64-NEXT:    sb a2, 8(a0)
153*298127dcSAlex Bradbury; RV64-NEXT:    sb s2, 9(a0)
154*298127dcSAlex Bradbury; RV64-NEXT:    sb s1, 10(a0)
155*298127dcSAlex Bradbury; RV64-NEXT:    sb s0, 11(a0)
156*298127dcSAlex Bradbury; RV64-NEXT:    addi a0, a0, 16
157*298127dcSAlex Bradbury; RV64-NEXT:    bne a0, a3, .LBB1_1
158*298127dcSAlex Bradbury; RV64-NEXT:  # %bb.2: # %split
159*298127dcSAlex Bradbury; RV64-NEXT:    ld s0, 24(sp) # 8-byte Folded Reload
160*298127dcSAlex Bradbury; RV64-NEXT:    ld s1, 16(sp) # 8-byte Folded Reload
161*298127dcSAlex Bradbury; RV64-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
162*298127dcSAlex Bradbury; RV64-NEXT:    addi sp, sp, 32
163*298127dcSAlex Bradbury; RV64-NEXT:    ret
164*298127dcSAlex Bradbury;
165*298127dcSAlex Bradbury; RV32-FAST-LABEL: memset_1_noalign:
166*298127dcSAlex Bradbury; RV32-FAST:       # %bb.0: # %loadstoreloop.preheader
167*298127dcSAlex Bradbury; RV32-FAST-NEXT:    li a2, 0
168*298127dcSAlex Bradbury; RV32-FAST-NEXT:    lw a3, 0(a1)
169*298127dcSAlex Bradbury; RV32-FAST-NEXT:    lw a4, 4(a1)
170*298127dcSAlex Bradbury; RV32-FAST-NEXT:    lw a5, 8(a1)
171*298127dcSAlex Bradbury; RV32-FAST-NEXT:    lw a1, 12(a1)
172*298127dcSAlex Bradbury; RV32-FAST-NEXT:    li a6, 0
173*298127dcSAlex Bradbury; RV32-FAST-NEXT:  .LBB1_1: # %loadstoreloop
174*298127dcSAlex Bradbury; RV32-FAST-NEXT:    # =>This Inner Loop Header: Depth=1
175*298127dcSAlex Bradbury; RV32-FAST-NEXT:    slli a7, a2, 4
176*298127dcSAlex Bradbury; RV32-FAST-NEXT:    addi a2, a2, 1
177*298127dcSAlex Bradbury; RV32-FAST-NEXT:    add a7, a0, a7
178*298127dcSAlex Bradbury; RV32-FAST-NEXT:    seqz t0, a2
179*298127dcSAlex Bradbury; RV32-FAST-NEXT:    add a6, a6, t0
180*298127dcSAlex Bradbury; RV32-FAST-NEXT:    or t0, a2, a6
181*298127dcSAlex Bradbury; RV32-FAST-NEXT:    sw a3, 0(a7)
182*298127dcSAlex Bradbury; RV32-FAST-NEXT:    sw a4, 4(a7)
183*298127dcSAlex Bradbury; RV32-FAST-NEXT:    sw a5, 8(a7)
184*298127dcSAlex Bradbury; RV32-FAST-NEXT:    sw a1, 12(a7)
185*298127dcSAlex Bradbury; RV32-FAST-NEXT:    beqz t0, .LBB1_1
186*298127dcSAlex Bradbury; RV32-FAST-NEXT:  # %bb.2: # %split
187*298127dcSAlex Bradbury; RV32-FAST-NEXT:    ret
188*298127dcSAlex Bradbury;
189*298127dcSAlex Bradbury; RV64-FAST-LABEL: memset_1_noalign:
190*298127dcSAlex Bradbury; RV64-FAST:       # %bb.0: # %loadstoreloop.preheader
191*298127dcSAlex Bradbury; RV64-FAST-NEXT:    addi a3, a0, 16
192*298127dcSAlex Bradbury; RV64-FAST-NEXT:  .LBB1_1: # %loadstoreloop
193*298127dcSAlex Bradbury; RV64-FAST-NEXT:    # =>This Inner Loop Header: Depth=1
194*298127dcSAlex Bradbury; RV64-FAST-NEXT:    sd a1, 0(a0)
195*298127dcSAlex Bradbury; RV64-FAST-NEXT:    sd a2, 8(a0)
196*298127dcSAlex Bradbury; RV64-FAST-NEXT:    addi a0, a0, 16
197*298127dcSAlex Bradbury; RV64-FAST-NEXT:    bne a0, a3, .LBB1_1
198*298127dcSAlex Bradbury; RV64-FAST-NEXT:  # %bb.2: # %split
199*298127dcSAlex Bradbury; RV64-FAST-NEXT:    ret
200*298127dcSAlex Bradbury  tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
201*298127dcSAlex Bradbury  ret void
202*298127dcSAlex Bradbury}
203*298127dcSAlex Bradbury
204*298127dcSAlex Bradburydefine void @memset_4(ptr %a, i128 %value) nounwind {
205*298127dcSAlex Bradbury; RV32-BOTH-LABEL: memset_4:
206*298127dcSAlex Bradbury; RV32-BOTH:       # %bb.0: # %loadstoreloop.preheader
207*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    li a2, 0
208*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a3, 0(a1)
209*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a4, 4(a1)
210*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a5, 8(a1)
211*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a1, 12(a1)
212*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    li a6, 0
213*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  .LBB2_1: # %loadstoreloop
214*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
215*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    slli a7, a2, 4
216*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    addi a2, a2, 1
217*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    seqz t0, a2
218*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sltiu t1, a2, 4
219*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    add a6, a6, t0
220*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    seqz t0, a6
221*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    and t0, t0, t1
222*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    add a7, a0, a7
223*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a3, 0(a7)
224*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a4, 4(a7)
225*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a5, 8(a7)
226*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a1, 12(a7)
227*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    bnez t0, .LBB2_1
228*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  # %bb.2: # %split
229*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    ret
230*298127dcSAlex Bradbury;
231*298127dcSAlex Bradbury; RV64-BOTH-LABEL: memset_4:
232*298127dcSAlex Bradbury; RV64-BOTH:       # %bb.0: # %loadstoreloop.preheader
233*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    addi a3, a0, 64
234*298127dcSAlex Bradbury; RV64-BOTH-NEXT:  .LBB2_1: # %loadstoreloop
235*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
236*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    sd a1, 0(a0)
237*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    sd a2, 8(a0)
238*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    addi a0, a0, 16
239*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    bne a0, a3, .LBB2_1
240*298127dcSAlex Bradbury; RV64-BOTH-NEXT:  # %bb.2: # %split
241*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    ret
242*298127dcSAlex Bradbury  tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 4, i1 0)
243*298127dcSAlex Bradbury  ret void
244*298127dcSAlex Bradbury}
245*298127dcSAlex Bradbury
246*298127dcSAlex Bradburydefine void @memset_x(ptr %a, i128 %value, i64 %x) nounwind {
247*298127dcSAlex Bradbury; RV32-BOTH-LABEL: memset_x:
248*298127dcSAlex Bradbury; RV32-BOTH:       # %bb.0:
249*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    or a4, a2, a3
250*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    beqz a4, .LBB3_5
251*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  # %bb.1: # %loadstoreloop.preheader
252*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    li a4, 0
253*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a5, 0(a1)
254*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a6, 4(a1)
255*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a7, 8(a1)
256*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    lw a1, 12(a1)
257*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    li t0, 0
258*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    j .LBB3_3
259*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  .LBB3_2: # %loadstoreloop
260*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    # in Loop: Header=BB3_3 Depth=1
261*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sltu t1, t0, a3
262*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    beqz t1, .LBB3_5
263*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  .LBB3_3: # %loadstoreloop
264*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
265*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    slli t1, a4, 4
266*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    addi a4, a4, 1
267*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    seqz t2, a4
268*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    add t0, t0, t2
269*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    add t1, a0, t1
270*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a5, 0(t1)
271*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a6, 4(t1)
272*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a7, 8(t1)
273*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sw a1, 12(t1)
274*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    bne t0, a3, .LBB3_2
275*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  # %bb.4: # in Loop: Header=BB3_3 Depth=1
276*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    sltu t1, a4, a2
277*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    bnez t1, .LBB3_3
278*298127dcSAlex Bradbury; RV32-BOTH-NEXT:  .LBB3_5: # %split
279*298127dcSAlex Bradbury; RV32-BOTH-NEXT:    ret
280*298127dcSAlex Bradbury;
281*298127dcSAlex Bradbury; RV64-BOTH-LABEL: memset_x:
282*298127dcSAlex Bradbury; RV64-BOTH:       # %bb.0:
283*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    beqz a3, .LBB3_3
284*298127dcSAlex Bradbury; RV64-BOTH-NEXT:  # %bb.1: # %loadstoreloop.preheader
285*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    li a4, 0
286*298127dcSAlex Bradbury; RV64-BOTH-NEXT:  .LBB3_2: # %loadstoreloop
287*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
288*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    sd a1, 0(a0)
289*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    sd a2, 8(a0)
290*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    addi a4, a4, 1
291*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    addi a0, a0, 16
292*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    bltu a4, a3, .LBB3_2
293*298127dcSAlex Bradbury; RV64-BOTH-NEXT:  .LBB3_3: # %split
294*298127dcSAlex Bradbury; RV64-BOTH-NEXT:    ret
295*298127dcSAlex Bradbury  tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 %x, i1 0)
296*298127dcSAlex Bradbury  ret void
297*298127dcSAlex Bradbury}
298