xref: /llvm-project/llvm/test/CodeGen/RISCV/memset-pattern.ll (revision 298127dcbe2ecd1f3c49c2109ac96654778f20be)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple=riscv32 -mattr=+m \
3; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32
4; RUN: llc < %s -mtriple=riscv64 -mattr=+m \
5; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64
6; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+unaligned-scalar-mem \
7; RUN:   | FileCheck %s --check-prefixes=RV32-BOTH,RV32-FAST
8; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+unaligned-scalar-mem \
9; RUN:   | FileCheck %s --check-prefixes=RV64-BOTH,RV64-FAST
10
11; TODO: Due to the initial naive lowering implementation of memset.pattern in
12; PreISelIntrinsicLowering, the generated code is not good.
13
14define void @memset_1(ptr %a, i128 %value) nounwind {
15; RV32-BOTH-LABEL: memset_1:
16; RV32-BOTH:       # %bb.0: # %loadstoreloop.preheader
17; RV32-BOTH-NEXT:    li a2, 0
18; RV32-BOTH-NEXT:    lw a3, 0(a1)
19; RV32-BOTH-NEXT:    lw a4, 4(a1)
20; RV32-BOTH-NEXT:    lw a5, 8(a1)
21; RV32-BOTH-NEXT:    lw a1, 12(a1)
22; RV32-BOTH-NEXT:    li a6, 0
23; RV32-BOTH-NEXT:  .LBB0_1: # %loadstoreloop
24; RV32-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
25; RV32-BOTH-NEXT:    slli a7, a2, 4
26; RV32-BOTH-NEXT:    addi a2, a2, 1
27; RV32-BOTH-NEXT:    add a7, a0, a7
28; RV32-BOTH-NEXT:    seqz t0, a2
29; RV32-BOTH-NEXT:    add a6, a6, t0
30; RV32-BOTH-NEXT:    or t0, a2, a6
31; RV32-BOTH-NEXT:    sw a3, 0(a7)
32; RV32-BOTH-NEXT:    sw a4, 4(a7)
33; RV32-BOTH-NEXT:    sw a5, 8(a7)
34; RV32-BOTH-NEXT:    sw a1, 12(a7)
35; RV32-BOTH-NEXT:    beqz t0, .LBB0_1
36; RV32-BOTH-NEXT:  # %bb.2: # %split
37; RV32-BOTH-NEXT:    ret
38;
39; RV64-BOTH-LABEL: memset_1:
40; RV64-BOTH:       # %bb.0: # %loadstoreloop.preheader
41; RV64-BOTH-NEXT:    addi a3, a0, 16
42; RV64-BOTH-NEXT:  .LBB0_1: # %loadstoreloop
43; RV64-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
44; RV64-BOTH-NEXT:    sd a1, 0(a0)
45; RV64-BOTH-NEXT:    sd a2, 8(a0)
46; RV64-BOTH-NEXT:    addi a0, a0, 16
47; RV64-BOTH-NEXT:    bne a0, a3, .LBB0_1
48; RV64-BOTH-NEXT:  # %bb.2: # %split
49; RV64-BOTH-NEXT:    ret
50  tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 1, i1 0)
51  ret void
52}
53
54define void @memset_1_noalign(ptr %a, i128 %value) nounwind {
55; RV32-LABEL: memset_1_noalign:
56; RV32:       # %bb.0: # %loadstoreloop.preheader
57; RV32-NEXT:    addi sp, sp, -32
58; RV32-NEXT:    sw s0, 28(sp) # 4-byte Folded Spill
59; RV32-NEXT:    sw s1, 24(sp) # 4-byte Folded Spill
60; RV32-NEXT:    sw s2, 20(sp) # 4-byte Folded Spill
61; RV32-NEXT:    sw s3, 16(sp) # 4-byte Folded Spill
62; RV32-NEXT:    sw s4, 12(sp) # 4-byte Folded Spill
63; RV32-NEXT:    sw s5, 8(sp) # 4-byte Folded Spill
64; RV32-NEXT:    li a2, 0
65; RV32-NEXT:    li a3, 0
66; RV32-NEXT:    lw a4, 4(a1)
67; RV32-NEXT:    lw a5, 0(a1)
68; RV32-NEXT:    lw a6, 8(a1)
69; RV32-NEXT:    lw a1, 12(a1)
70; RV32-NEXT:    srli a7, a4, 24
71; RV32-NEXT:    srli t0, a4, 16
72; RV32-NEXT:    srli t1, a4, 8
73; RV32-NEXT:    srli t2, a5, 24
74; RV32-NEXT:    srli t3, a5, 16
75; RV32-NEXT:    srli t4, a5, 8
76; RV32-NEXT:    srli t5, a6, 24
77; RV32-NEXT:    srli t6, a6, 16
78; RV32-NEXT:    srli s0, a6, 8
79; RV32-NEXT:    srli s1, a1, 24
80; RV32-NEXT:    srli s2, a1, 16
81; RV32-NEXT:    srli s3, a1, 8
82; RV32-NEXT:  .LBB1_1: # %loadstoreloop
83; RV32-NEXT:    # =>This Inner Loop Header: Depth=1
84; RV32-NEXT:    slli s4, a2, 4
85; RV32-NEXT:    addi a2, a2, 1
86; RV32-NEXT:    add s4, a0, s4
87; RV32-NEXT:    seqz s5, a2
88; RV32-NEXT:    sb a4, 4(s4)
89; RV32-NEXT:    sb t1, 5(s4)
90; RV32-NEXT:    sb t0, 6(s4)
91; RV32-NEXT:    sb a7, 7(s4)
92; RV32-NEXT:    sb a5, 0(s4)
93; RV32-NEXT:    sb t4, 1(s4)
94; RV32-NEXT:    sb t3, 2(s4)
95; RV32-NEXT:    sb t2, 3(s4)
96; RV32-NEXT:    sb a6, 8(s4)
97; RV32-NEXT:    sb s0, 9(s4)
98; RV32-NEXT:    sb t6, 10(s4)
99; RV32-NEXT:    sb t5, 11(s4)
100; RV32-NEXT:    add a3, a3, s5
101; RV32-NEXT:    or s5, a2, a3
102; RV32-NEXT:    sb a1, 12(s4)
103; RV32-NEXT:    sb s3, 13(s4)
104; RV32-NEXT:    sb s2, 14(s4)
105; RV32-NEXT:    sb s1, 15(s4)
106; RV32-NEXT:    beqz s5, .LBB1_1
107; RV32-NEXT:  # %bb.2: # %split
108; RV32-NEXT:    lw s0, 28(sp) # 4-byte Folded Reload
109; RV32-NEXT:    lw s1, 24(sp) # 4-byte Folded Reload
110; RV32-NEXT:    lw s2, 20(sp) # 4-byte Folded Reload
111; RV32-NEXT:    lw s3, 16(sp) # 4-byte Folded Reload
112; RV32-NEXT:    lw s4, 12(sp) # 4-byte Folded Reload
113; RV32-NEXT:    lw s5, 8(sp) # 4-byte Folded Reload
114; RV32-NEXT:    addi sp, sp, 32
115; RV32-NEXT:    ret
116;
117; RV64-LABEL: memset_1_noalign:
118; RV64:       # %bb.0: # %loadstoreloop.preheader
119; RV64-NEXT:    addi sp, sp, -32
120; RV64-NEXT:    sd s0, 24(sp) # 8-byte Folded Spill
121; RV64-NEXT:    sd s1, 16(sp) # 8-byte Folded Spill
122; RV64-NEXT:    sd s2, 8(sp) # 8-byte Folded Spill
123; RV64-NEXT:    addi a3, a0, 16
124; RV64-NEXT:    srli a4, a1, 56
125; RV64-NEXT:    srli a5, a1, 48
126; RV64-NEXT:    srli a6, a1, 40
127; RV64-NEXT:    srli a7, a1, 32
128; RV64-NEXT:    srli t0, a1, 24
129; RV64-NEXT:    srli t1, a1, 16
130; RV64-NEXT:    srli t2, a1, 8
131; RV64-NEXT:    srli t3, a2, 56
132; RV64-NEXT:    srli t4, a2, 48
133; RV64-NEXT:    srli t5, a2, 40
134; RV64-NEXT:    srli t6, a2, 32
135; RV64-NEXT:    srli s0, a2, 24
136; RV64-NEXT:    srli s1, a2, 16
137; RV64-NEXT:    srli s2, a2, 8
138; RV64-NEXT:  .LBB1_1: # %loadstoreloop
139; RV64-NEXT:    # =>This Inner Loop Header: Depth=1
140; RV64-NEXT:    sb a7, 4(a0)
141; RV64-NEXT:    sb a6, 5(a0)
142; RV64-NEXT:    sb a5, 6(a0)
143; RV64-NEXT:    sb a4, 7(a0)
144; RV64-NEXT:    sb a1, 0(a0)
145; RV64-NEXT:    sb t2, 1(a0)
146; RV64-NEXT:    sb t1, 2(a0)
147; RV64-NEXT:    sb t0, 3(a0)
148; RV64-NEXT:    sb t6, 12(a0)
149; RV64-NEXT:    sb t5, 13(a0)
150; RV64-NEXT:    sb t4, 14(a0)
151; RV64-NEXT:    sb t3, 15(a0)
152; RV64-NEXT:    sb a2, 8(a0)
153; RV64-NEXT:    sb s2, 9(a0)
154; RV64-NEXT:    sb s1, 10(a0)
155; RV64-NEXT:    sb s0, 11(a0)
156; RV64-NEXT:    addi a0, a0, 16
157; RV64-NEXT:    bne a0, a3, .LBB1_1
158; RV64-NEXT:  # %bb.2: # %split
159; RV64-NEXT:    ld s0, 24(sp) # 8-byte Folded Reload
160; RV64-NEXT:    ld s1, 16(sp) # 8-byte Folded Reload
161; RV64-NEXT:    ld s2, 8(sp) # 8-byte Folded Reload
162; RV64-NEXT:    addi sp, sp, 32
163; RV64-NEXT:    ret
164;
165; RV32-FAST-LABEL: memset_1_noalign:
166; RV32-FAST:       # %bb.0: # %loadstoreloop.preheader
167; RV32-FAST-NEXT:    li a2, 0
168; RV32-FAST-NEXT:    lw a3, 0(a1)
169; RV32-FAST-NEXT:    lw a4, 4(a1)
170; RV32-FAST-NEXT:    lw a5, 8(a1)
171; RV32-FAST-NEXT:    lw a1, 12(a1)
172; RV32-FAST-NEXT:    li a6, 0
173; RV32-FAST-NEXT:  .LBB1_1: # %loadstoreloop
174; RV32-FAST-NEXT:    # =>This Inner Loop Header: Depth=1
175; RV32-FAST-NEXT:    slli a7, a2, 4
176; RV32-FAST-NEXT:    addi a2, a2, 1
177; RV32-FAST-NEXT:    add a7, a0, a7
178; RV32-FAST-NEXT:    seqz t0, a2
179; RV32-FAST-NEXT:    add a6, a6, t0
180; RV32-FAST-NEXT:    or t0, a2, a6
181; RV32-FAST-NEXT:    sw a3, 0(a7)
182; RV32-FAST-NEXT:    sw a4, 4(a7)
183; RV32-FAST-NEXT:    sw a5, 8(a7)
184; RV32-FAST-NEXT:    sw a1, 12(a7)
185; RV32-FAST-NEXT:    beqz t0, .LBB1_1
186; RV32-FAST-NEXT:  # %bb.2: # %split
187; RV32-FAST-NEXT:    ret
188;
189; RV64-FAST-LABEL: memset_1_noalign:
190; RV64-FAST:       # %bb.0: # %loadstoreloop.preheader
191; RV64-FAST-NEXT:    addi a3, a0, 16
192; RV64-FAST-NEXT:  .LBB1_1: # %loadstoreloop
193; RV64-FAST-NEXT:    # =>This Inner Loop Header: Depth=1
194; RV64-FAST-NEXT:    sd a1, 0(a0)
195; RV64-FAST-NEXT:    sd a2, 8(a0)
196; RV64-FAST-NEXT:    addi a0, a0, 16
197; RV64-FAST-NEXT:    bne a0, a3, .LBB1_1
198; RV64-FAST-NEXT:  # %bb.2: # %split
199; RV64-FAST-NEXT:    ret
200  tail call void @llvm.experimental.memset.pattern(ptr %a, i128 %value, i64 1, i1 0)
201  ret void
202}
203
204define void @memset_4(ptr %a, i128 %value) nounwind {
205; RV32-BOTH-LABEL: memset_4:
206; RV32-BOTH:       # %bb.0: # %loadstoreloop.preheader
207; RV32-BOTH-NEXT:    li a2, 0
208; RV32-BOTH-NEXT:    lw a3, 0(a1)
209; RV32-BOTH-NEXT:    lw a4, 4(a1)
210; RV32-BOTH-NEXT:    lw a5, 8(a1)
211; RV32-BOTH-NEXT:    lw a1, 12(a1)
212; RV32-BOTH-NEXT:    li a6, 0
213; RV32-BOTH-NEXT:  .LBB2_1: # %loadstoreloop
214; RV32-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
215; RV32-BOTH-NEXT:    slli a7, a2, 4
216; RV32-BOTH-NEXT:    addi a2, a2, 1
217; RV32-BOTH-NEXT:    seqz t0, a2
218; RV32-BOTH-NEXT:    sltiu t1, a2, 4
219; RV32-BOTH-NEXT:    add a6, a6, t0
220; RV32-BOTH-NEXT:    seqz t0, a6
221; RV32-BOTH-NEXT:    and t0, t0, t1
222; RV32-BOTH-NEXT:    add a7, a0, a7
223; RV32-BOTH-NEXT:    sw a3, 0(a7)
224; RV32-BOTH-NEXT:    sw a4, 4(a7)
225; RV32-BOTH-NEXT:    sw a5, 8(a7)
226; RV32-BOTH-NEXT:    sw a1, 12(a7)
227; RV32-BOTH-NEXT:    bnez t0, .LBB2_1
228; RV32-BOTH-NEXT:  # %bb.2: # %split
229; RV32-BOTH-NEXT:    ret
230;
231; RV64-BOTH-LABEL: memset_4:
232; RV64-BOTH:       # %bb.0: # %loadstoreloop.preheader
233; RV64-BOTH-NEXT:    addi a3, a0, 64
234; RV64-BOTH-NEXT:  .LBB2_1: # %loadstoreloop
235; RV64-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
236; RV64-BOTH-NEXT:    sd a1, 0(a0)
237; RV64-BOTH-NEXT:    sd a2, 8(a0)
238; RV64-BOTH-NEXT:    addi a0, a0, 16
239; RV64-BOTH-NEXT:    bne a0, a3, .LBB2_1
240; RV64-BOTH-NEXT:  # %bb.2: # %split
241; RV64-BOTH-NEXT:    ret
242  tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 4, i1 0)
243  ret void
244}
245
246define void @memset_x(ptr %a, i128 %value, i64 %x) nounwind {
247; RV32-BOTH-LABEL: memset_x:
248; RV32-BOTH:       # %bb.0:
249; RV32-BOTH-NEXT:    or a4, a2, a3
250; RV32-BOTH-NEXT:    beqz a4, .LBB3_5
251; RV32-BOTH-NEXT:  # %bb.1: # %loadstoreloop.preheader
252; RV32-BOTH-NEXT:    li a4, 0
253; RV32-BOTH-NEXT:    lw a5, 0(a1)
254; RV32-BOTH-NEXT:    lw a6, 4(a1)
255; RV32-BOTH-NEXT:    lw a7, 8(a1)
256; RV32-BOTH-NEXT:    lw a1, 12(a1)
257; RV32-BOTH-NEXT:    li t0, 0
258; RV32-BOTH-NEXT:    j .LBB3_3
259; RV32-BOTH-NEXT:  .LBB3_2: # %loadstoreloop
260; RV32-BOTH-NEXT:    # in Loop: Header=BB3_3 Depth=1
261; RV32-BOTH-NEXT:    sltu t1, t0, a3
262; RV32-BOTH-NEXT:    beqz t1, .LBB3_5
263; RV32-BOTH-NEXT:  .LBB3_3: # %loadstoreloop
264; RV32-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
265; RV32-BOTH-NEXT:    slli t1, a4, 4
266; RV32-BOTH-NEXT:    addi a4, a4, 1
267; RV32-BOTH-NEXT:    seqz t2, a4
268; RV32-BOTH-NEXT:    add t0, t0, t2
269; RV32-BOTH-NEXT:    add t1, a0, t1
270; RV32-BOTH-NEXT:    sw a5, 0(t1)
271; RV32-BOTH-NEXT:    sw a6, 4(t1)
272; RV32-BOTH-NEXT:    sw a7, 8(t1)
273; RV32-BOTH-NEXT:    sw a1, 12(t1)
274; RV32-BOTH-NEXT:    bne t0, a3, .LBB3_2
275; RV32-BOTH-NEXT:  # %bb.4: # in Loop: Header=BB3_3 Depth=1
276; RV32-BOTH-NEXT:    sltu t1, a4, a2
277; RV32-BOTH-NEXT:    bnez t1, .LBB3_3
278; RV32-BOTH-NEXT:  .LBB3_5: # %split
279; RV32-BOTH-NEXT:    ret
280;
281; RV64-BOTH-LABEL: memset_x:
282; RV64-BOTH:       # %bb.0:
283; RV64-BOTH-NEXT:    beqz a3, .LBB3_3
284; RV64-BOTH-NEXT:  # %bb.1: # %loadstoreloop.preheader
285; RV64-BOTH-NEXT:    li a4, 0
286; RV64-BOTH-NEXT:  .LBB3_2: # %loadstoreloop
287; RV64-BOTH-NEXT:    # =>This Inner Loop Header: Depth=1
288; RV64-BOTH-NEXT:    sd a1, 0(a0)
289; RV64-BOTH-NEXT:    sd a2, 8(a0)
290; RV64-BOTH-NEXT:    addi a4, a4, 1
291; RV64-BOTH-NEXT:    addi a0, a0, 16
292; RV64-BOTH-NEXT:    bltu a4, a3, .LBB3_2
293; RV64-BOTH-NEXT:  .LBB3_3: # %split
294; RV64-BOTH-NEXT:    ret
295  tail call void @llvm.experimental.memset.pattern(ptr align 8 %a, i128 %value, i64 %x, i1 0)
296  ret void
297}
298