xref: /llvm-project/llvm/test/CodeGen/RISCV/nontemporal.ll (revision 97982a8c605fac7c86d02e641a6cd7898b3ca343)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh < %s | FileCheck %s -check-prefix=CHECK-RV64
3; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh < %s | FileCheck %s -check-prefix=CHECK-RV32
4; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+c < %s | FileCheck %s -check-prefix=CHECK-RV64C
5; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+c < %s | FileCheck %s -check-prefix=CHECK-RV32C
6; RUN: llc -mtriple=riscv64 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV64V
7; RUN: llc -mtriple=riscv32 -mattr=+zihintntl,+f,+d,+zfh,+v < %s | FileCheck %s -check-prefix=CHECK-RV32V
8
9define i64 @test_nontemporal_load_i64(ptr %p) {
10; CHECK-RV64-LABEL: test_nontemporal_load_i64:
11; CHECK-RV64:       # %bb.0:
12; CHECK-RV64-NEXT:    ntl.all
13; CHECK-RV64-NEXT:    ld a0, 0(a0)
14; CHECK-RV64-NEXT:    ret
15;
16; CHECK-RV32-LABEL: test_nontemporal_load_i64:
17; CHECK-RV32:       # %bb.0:
18; CHECK-RV32-NEXT:    ntl.all
19; CHECK-RV32-NEXT:    lw a2, 0(a0)
20; CHECK-RV32-NEXT:    ntl.all
21; CHECK-RV32-NEXT:    lw a1, 4(a0)
22; CHECK-RV32-NEXT:    mv a0, a2
23; CHECK-RV32-NEXT:    ret
24;
25; CHECK-RV64C-LABEL: test_nontemporal_load_i64:
26; CHECK-RV64C:       # %bb.0:
27; CHECK-RV64C-NEXT:    c.ntl.all
28; CHECK-RV64C-NEXT:    ld a0, 0(a0)
29; CHECK-RV64C-NEXT:    ret
30;
31; CHECK-RV32C-LABEL: test_nontemporal_load_i64:
32; CHECK-RV32C:       # %bb.0:
33; CHECK-RV32C-NEXT:    c.ntl.all
34; CHECK-RV32C-NEXT:    lw a2, 0(a0)
35; CHECK-RV32C-NEXT:    c.ntl.all
36; CHECK-RV32C-NEXT:    lw a1, 4(a0)
37; CHECK-RV32C-NEXT:    mv a0, a2
38; CHECK-RV32C-NEXT:    ret
39;
40; CHECK-RV64V-LABEL: test_nontemporal_load_i64:
41; CHECK-RV64V:       # %bb.0:
42; CHECK-RV64V-NEXT:    ntl.all
43; CHECK-RV64V-NEXT:    ld a0, 0(a0)
44; CHECK-RV64V-NEXT:    ret
45;
46; CHECK-RV32V-LABEL: test_nontemporal_load_i64:
47; CHECK-RV32V:       # %bb.0:
48; CHECK-RV32V-NEXT:    ntl.all
49; CHECK-RV32V-NEXT:    lw a2, 0(a0)
50; CHECK-RV32V-NEXT:    ntl.all
51; CHECK-RV32V-NEXT:    lw a1, 4(a0)
52; CHECK-RV32V-NEXT:    mv a0, a2
53; CHECK-RV32V-NEXT:    ret
54
55  %1 = load i64, ptr %p, !nontemporal !0
56  ret i64 %1
57}
58
59define i32 @test_nontemporal_load_i32(ptr %p) {
60; CHECK-RV64-LABEL: test_nontemporal_load_i32:
61; CHECK-RV64:       # %bb.0:
62; CHECK-RV64-NEXT:    ntl.all
63; CHECK-RV64-NEXT:    lw a0, 0(a0)
64; CHECK-RV64-NEXT:    ret
65;
66; CHECK-RV32-LABEL: test_nontemporal_load_i32:
67; CHECK-RV32:       # %bb.0:
68; CHECK-RV32-NEXT:    ntl.all
69; CHECK-RV32-NEXT:    lw a0, 0(a0)
70; CHECK-RV32-NEXT:    ret
71;
72; CHECK-RV64C-LABEL: test_nontemporal_load_i32:
73; CHECK-RV64C:       # %bb.0:
74; CHECK-RV64C-NEXT:    c.ntl.all
75; CHECK-RV64C-NEXT:    lw a0, 0(a0)
76; CHECK-RV64C-NEXT:    ret
77;
78; CHECK-RV32C-LABEL: test_nontemporal_load_i32:
79; CHECK-RV32C:       # %bb.0:
80; CHECK-RV32C-NEXT:    c.ntl.all
81; CHECK-RV32C-NEXT:    lw a0, 0(a0)
82; CHECK-RV32C-NEXT:    ret
83;
84; CHECK-RV64V-LABEL: test_nontemporal_load_i32:
85; CHECK-RV64V:       # %bb.0:
86; CHECK-RV64V-NEXT:    ntl.all
87; CHECK-RV64V-NEXT:    lw a0, 0(a0)
88; CHECK-RV64V-NEXT:    ret
89;
90; CHECK-RV32V-LABEL: test_nontemporal_load_i32:
91; CHECK-RV32V:       # %bb.0:
92; CHECK-RV32V-NEXT:    ntl.all
93; CHECK-RV32V-NEXT:    lw a0, 0(a0)
94; CHECK-RV32V-NEXT:    ret
95
96  %1 = load i32, ptr %p, !nontemporal !0
97  ret i32 %1
98}
99
100define i16 @test_nontemporal_load_i16(ptr %p) {
101; CHECK-RV64-LABEL: test_nontemporal_load_i16:
102; CHECK-RV64:       # %bb.0:
103; CHECK-RV64-NEXT:    ntl.all
104; CHECK-RV64-NEXT:    lh a0, 0(a0)
105; CHECK-RV64-NEXT:    ret
106;
107; CHECK-RV32-LABEL: test_nontemporal_load_i16:
108; CHECK-RV32:       # %bb.0:
109; CHECK-RV32-NEXT:    ntl.all
110; CHECK-RV32-NEXT:    lh a0, 0(a0)
111; CHECK-RV32-NEXT:    ret
112;
113; CHECK-RV64C-LABEL: test_nontemporal_load_i16:
114; CHECK-RV64C:       # %bb.0:
115; CHECK-RV64C-NEXT:    c.ntl.all
116; CHECK-RV64C-NEXT:    lh a0, 0(a0)
117; CHECK-RV64C-NEXT:    ret
118;
119; CHECK-RV32C-LABEL: test_nontemporal_load_i16:
120; CHECK-RV32C:       # %bb.0:
121; CHECK-RV32C-NEXT:    c.ntl.all
122; CHECK-RV32C-NEXT:    lh a0, 0(a0)
123; CHECK-RV32C-NEXT:    ret
124;
125; CHECK-RV64V-LABEL: test_nontemporal_load_i16:
126; CHECK-RV64V:       # %bb.0:
127; CHECK-RV64V-NEXT:    ntl.all
128; CHECK-RV64V-NEXT:    lh a0, 0(a0)
129; CHECK-RV64V-NEXT:    ret
130;
131; CHECK-RV32V-LABEL: test_nontemporal_load_i16:
132; CHECK-RV32V:       # %bb.0:
133; CHECK-RV32V-NEXT:    ntl.all
134; CHECK-RV32V-NEXT:    lh a0, 0(a0)
135; CHECK-RV32V-NEXT:    ret
136
137  %1 = load i16, ptr %p, !nontemporal !0
138  ret i16 %1
139}
140
141define i8 @test_nontemporal_load_i8(ptr %p) {
142; CHECK-RV64-LABEL: test_nontemporal_load_i8:
143; CHECK-RV64:       # %bb.0:
144; CHECK-RV64-NEXT:    ntl.all
145; CHECK-RV64-NEXT:    lbu a0, 0(a0)
146; CHECK-RV64-NEXT:    ret
147;
148; CHECK-RV32-LABEL: test_nontemporal_load_i8:
149; CHECK-RV32:       # %bb.0:
150; CHECK-RV32-NEXT:    ntl.all
151; CHECK-RV32-NEXT:    lbu a0, 0(a0)
152; CHECK-RV32-NEXT:    ret
153;
154; CHECK-RV64C-LABEL: test_nontemporal_load_i8:
155; CHECK-RV64C:       # %bb.0:
156; CHECK-RV64C-NEXT:    c.ntl.all
157; CHECK-RV64C-NEXT:    lbu a0, 0(a0)
158; CHECK-RV64C-NEXT:    ret
159;
160; CHECK-RV32C-LABEL: test_nontemporal_load_i8:
161; CHECK-RV32C:       # %bb.0:
162; CHECK-RV32C-NEXT:    c.ntl.all
163; CHECK-RV32C-NEXT:    lbu a0, 0(a0)
164; CHECK-RV32C-NEXT:    ret
165;
166; CHECK-RV64V-LABEL: test_nontemporal_load_i8:
167; CHECK-RV64V:       # %bb.0:
168; CHECK-RV64V-NEXT:    ntl.all
169; CHECK-RV64V-NEXT:    lbu a0, 0(a0)
170; CHECK-RV64V-NEXT:    ret
171;
172; CHECK-RV32V-LABEL: test_nontemporal_load_i8:
173; CHECK-RV32V:       # %bb.0:
174; CHECK-RV32V-NEXT:    ntl.all
175; CHECK-RV32V-NEXT:    lbu a0, 0(a0)
176; CHECK-RV32V-NEXT:    ret
177
178  %1 = load i8, ptr %p, !nontemporal !0
179  ret i8 %1
180}
181
182define half @test_nontemporal_load_half(ptr %p) nounwind {
183; CHECK-RV64-LABEL: test_nontemporal_load_half:
184; CHECK-RV64:       # %bb.0:
185; CHECK-RV64-NEXT:    ntl.all
186; CHECK-RV64-NEXT:    flh fa5, 0(a0)
187; CHECK-RV64-NEXT:    ntl.all
188; CHECK-RV64-NEXT:    flh fa4, 6(a0)
189; CHECK-RV64-NEXT:    fadd.h fa0, fa5, fa4
190; CHECK-RV64-NEXT:    ret
191;
192; CHECK-RV32-LABEL: test_nontemporal_load_half:
193; CHECK-RV32:       # %bb.0:
194; CHECK-RV32-NEXT:    ntl.all
195; CHECK-RV32-NEXT:    flh fa5, 0(a0)
196; CHECK-RV32-NEXT:    ntl.all
197; CHECK-RV32-NEXT:    flh fa4, 6(a0)
198; CHECK-RV32-NEXT:    fadd.h fa0, fa5, fa4
199; CHECK-RV32-NEXT:    ret
200;
201; CHECK-RV64C-LABEL: test_nontemporal_load_half:
202; CHECK-RV64C:       # %bb.0:
203; CHECK-RV64C-NEXT:    c.ntl.all
204; CHECK-RV64C-NEXT:    flh fa5, 0(a0)
205; CHECK-RV64C-NEXT:    c.ntl.all
206; CHECK-RV64C-NEXT:    flh fa4, 6(a0)
207; CHECK-RV64C-NEXT:    fadd.h fa0, fa5, fa4
208; CHECK-RV64C-NEXT:    ret
209;
210; CHECK-RV32C-LABEL: test_nontemporal_load_half:
211; CHECK-RV32C:       # %bb.0:
212; CHECK-RV32C-NEXT:    c.ntl.all
213; CHECK-RV32C-NEXT:    flh fa5, 0(a0)
214; CHECK-RV32C-NEXT:    c.ntl.all
215; CHECK-RV32C-NEXT:    flh fa4, 6(a0)
216; CHECK-RV32C-NEXT:    fadd.h fa0, fa5, fa4
217; CHECK-RV32C-NEXT:    ret
218;
219; CHECK-RV64V-LABEL: test_nontemporal_load_half:
220; CHECK-RV64V:       # %bb.0:
221; CHECK-RV64V-NEXT:    ntl.all
222; CHECK-RV64V-NEXT:    flh fa5, 0(a0)
223; CHECK-RV64V-NEXT:    ntl.all
224; CHECK-RV64V-NEXT:    flh fa4, 6(a0)
225; CHECK-RV64V-NEXT:    fadd.h fa0, fa5, fa4
226; CHECK-RV64V-NEXT:    ret
227;
228; CHECK-RV32V-LABEL: test_nontemporal_load_half:
229; CHECK-RV32V:       # %bb.0:
230; CHECK-RV32V-NEXT:    ntl.all
231; CHECK-RV32V-NEXT:    flh fa5, 0(a0)
232; CHECK-RV32V-NEXT:    ntl.all
233; CHECK-RV32V-NEXT:    flh fa4, 6(a0)
234; CHECK-RV32V-NEXT:    fadd.h fa0, fa5, fa4
235; CHECK-RV32V-NEXT:    ret
236
237  %1 = load half, ptr %p, !nontemporal !0
238  %2 = getelementptr half, ptr %p, i32 3
239  %3 = load half, ptr %2, !nontemporal !0
240  %4 = fadd half %1, %3
241  ret half %4
242}
243
244define float @test_nontemporal_load_float(ptr %p) {
245; CHECK-RV64-LABEL: test_nontemporal_load_float:
246; CHECK-RV64:       # %bb.0:
247; CHECK-RV64-NEXT:    ntl.all
248; CHECK-RV64-NEXT:    flw fa0, 0(a0)
249; CHECK-RV64-NEXT:    ret
250;
251; CHECK-RV32-LABEL: test_nontemporal_load_float:
252; CHECK-RV32:       # %bb.0:
253; CHECK-RV32-NEXT:    ntl.all
254; CHECK-RV32-NEXT:    flw fa0, 0(a0)
255; CHECK-RV32-NEXT:    ret
256;
257; CHECK-RV64C-LABEL: test_nontemporal_load_float:
258; CHECK-RV64C:       # %bb.0:
259; CHECK-RV64C-NEXT:    c.ntl.all
260; CHECK-RV64C-NEXT:    flw fa0, 0(a0)
261; CHECK-RV64C-NEXT:    ret
262;
263; CHECK-RV32C-LABEL: test_nontemporal_load_float:
264; CHECK-RV32C:       # %bb.0:
265; CHECK-RV32C-NEXT:    c.ntl.all
266; CHECK-RV32C-NEXT:    flw fa0, 0(a0)
267; CHECK-RV32C-NEXT:    ret
268;
269; CHECK-RV64V-LABEL: test_nontemporal_load_float:
270; CHECK-RV64V:       # %bb.0:
271; CHECK-RV64V-NEXT:    ntl.all
272; CHECK-RV64V-NEXT:    flw fa0, 0(a0)
273; CHECK-RV64V-NEXT:    ret
274;
275; CHECK-RV32V-LABEL: test_nontemporal_load_float:
276; CHECK-RV32V:       # %bb.0:
277; CHECK-RV32V-NEXT:    ntl.all
278; CHECK-RV32V-NEXT:    flw fa0, 0(a0)
279; CHECK-RV32V-NEXT:    ret
280
281  %1 = load float, ptr %p, !nontemporal !0
282  ret float %1
283}
284
285define double @test_nontemporal_load_double(ptr %p) {
286; CHECK-RV64-LABEL: test_nontemporal_load_double:
287; CHECK-RV64:       # %bb.0:
288; CHECK-RV64-NEXT:    ntl.all
289; CHECK-RV64-NEXT:    fld fa0, 0(a0)
290; CHECK-RV64-NEXT:    ret
291;
292; CHECK-RV32-LABEL: test_nontemporal_load_double:
293; CHECK-RV32:       # %bb.0:
294; CHECK-RV32-NEXT:    ntl.all
295; CHECK-RV32-NEXT:    fld fa0, 0(a0)
296; CHECK-RV32-NEXT:    ret
297;
298; CHECK-RV64C-LABEL: test_nontemporal_load_double:
299; CHECK-RV64C:       # %bb.0:
300; CHECK-RV64C-NEXT:    c.ntl.all
301; CHECK-RV64C-NEXT:    fld fa0, 0(a0)
302; CHECK-RV64C-NEXT:    ret
303;
304; CHECK-RV32C-LABEL: test_nontemporal_load_double:
305; CHECK-RV32C:       # %bb.0:
306; CHECK-RV32C-NEXT:    c.ntl.all
307; CHECK-RV32C-NEXT:    fld fa0, 0(a0)
308; CHECK-RV32C-NEXT:    ret
309;
310; CHECK-RV64V-LABEL: test_nontemporal_load_double:
311; CHECK-RV64V:       # %bb.0:
312; CHECK-RV64V-NEXT:    ntl.all
313; CHECK-RV64V-NEXT:    fld fa0, 0(a0)
314; CHECK-RV64V-NEXT:    ret
315;
316; CHECK-RV32V-LABEL: test_nontemporal_load_double:
317; CHECK-RV32V:       # %bb.0:
318; CHECK-RV32V-NEXT:    ntl.all
319; CHECK-RV32V-NEXT:    fld fa0, 0(a0)
320; CHECK-RV32V-NEXT:    ret
321
322  %1 = load double, ptr %p, !nontemporal !0
323  ret double %1
324}
325
326define <16 x i8> @test_nontemporal_load_v16i8(ptr %p) {
327; CHECK-RV64-LABEL: test_nontemporal_load_v16i8:
328; CHECK-RV64:       # %bb.0:
329; CHECK-RV64-NEXT:    ntl.all
330; CHECK-RV64-NEXT:    ld a2, 0(a1)
331; CHECK-RV64-NEXT:    ntl.all
332; CHECK-RV64-NEXT:    ld a1, 8(a1)
333; CHECK-RV64-NEXT:    sd a2, 0(a0)
334; CHECK-RV64-NEXT:    sd a1, 8(a0)
335; CHECK-RV64-NEXT:    ret
336;
337; CHECK-RV32-LABEL: test_nontemporal_load_v16i8:
338; CHECK-RV32:       # %bb.0:
339; CHECK-RV32-NEXT:    ntl.all
340; CHECK-RV32-NEXT:    lw a2, 0(a1)
341; CHECK-RV32-NEXT:    ntl.all
342; CHECK-RV32-NEXT:    lw a3, 4(a1)
343; CHECK-RV32-NEXT:    ntl.all
344; CHECK-RV32-NEXT:    lw a4, 8(a1)
345; CHECK-RV32-NEXT:    ntl.all
346; CHECK-RV32-NEXT:    lw a1, 12(a1)
347; CHECK-RV32-NEXT:    sw a2, 0(a0)
348; CHECK-RV32-NEXT:    sw a3, 4(a0)
349; CHECK-RV32-NEXT:    sw a4, 8(a0)
350; CHECK-RV32-NEXT:    sw a1, 12(a0)
351; CHECK-RV32-NEXT:    ret
352;
353; CHECK-RV64C-LABEL: test_nontemporal_load_v16i8:
354; CHECK-RV64C:       # %bb.0:
355; CHECK-RV64C-NEXT:    c.ntl.all
356; CHECK-RV64C-NEXT:    ld a2, 0(a1)
357; CHECK-RV64C-NEXT:    c.ntl.all
358; CHECK-RV64C-NEXT:    ld a1, 8(a1)
359; CHECK-RV64C-NEXT:    sd a2, 0(a0)
360; CHECK-RV64C-NEXT:    sd a1, 8(a0)
361; CHECK-RV64C-NEXT:    ret
362;
363; CHECK-RV32C-LABEL: test_nontemporal_load_v16i8:
364; CHECK-RV32C:       # %bb.0:
365; CHECK-RV32C-NEXT:    c.ntl.all
366; CHECK-RV32C-NEXT:    lw a2, 0(a1)
367; CHECK-RV32C-NEXT:    c.ntl.all
368; CHECK-RV32C-NEXT:    lw a3, 4(a1)
369; CHECK-RV32C-NEXT:    c.ntl.all
370; CHECK-RV32C-NEXT:    lw a4, 8(a1)
371; CHECK-RV32C-NEXT:    c.ntl.all
372; CHECK-RV32C-NEXT:    lw a1, 12(a1)
373; CHECK-RV32C-NEXT:    sw a2, 0(a0)
374; CHECK-RV32C-NEXT:    sw a3, 4(a0)
375; CHECK-RV32C-NEXT:    sw a4, 8(a0)
376; CHECK-RV32C-NEXT:    sw a1, 12(a0)
377; CHECK-RV32C-NEXT:    ret
378;
379; CHECK-RV64V-LABEL: test_nontemporal_load_v16i8:
380; CHECK-RV64V:       # %bb.0:
381; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
382; CHECK-RV64V-NEXT:    ntl.all
383; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
384; CHECK-RV64V-NEXT:    ret
385;
386; CHECK-RV32V-LABEL: test_nontemporal_load_v16i8:
387; CHECK-RV32V:       # %bb.0:
388; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
389; CHECK-RV32V-NEXT:    ntl.all
390; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
391; CHECK-RV32V-NEXT:    ret
392
393  %1 = load <16 x i8>, ptr %p, !nontemporal !0
394  ret <16 x i8> %1
395}
396
397define <8 x i16> @test_nontemporal_load_v8i16(ptr %p) {
398; CHECK-RV64-LABEL: test_nontemporal_load_v8i16:
399; CHECK-RV64:       # %bb.0:
400; CHECK-RV64-NEXT:    ntl.all
401; CHECK-RV64-NEXT:    ld a2, 0(a1)
402; CHECK-RV64-NEXT:    ntl.all
403; CHECK-RV64-NEXT:    ld a1, 8(a1)
404; CHECK-RV64-NEXT:    sd a2, 0(a0)
405; CHECK-RV64-NEXT:    sd a1, 8(a0)
406; CHECK-RV64-NEXT:    ret
407;
408; CHECK-RV32-LABEL: test_nontemporal_load_v8i16:
409; CHECK-RV32:       # %bb.0:
410; CHECK-RV32-NEXT:    ntl.all
411; CHECK-RV32-NEXT:    lw a2, 0(a1)
412; CHECK-RV32-NEXT:    ntl.all
413; CHECK-RV32-NEXT:    lw a3, 4(a1)
414; CHECK-RV32-NEXT:    ntl.all
415; CHECK-RV32-NEXT:    lw a4, 8(a1)
416; CHECK-RV32-NEXT:    ntl.all
417; CHECK-RV32-NEXT:    lw a1, 12(a1)
418; CHECK-RV32-NEXT:    sw a2, 0(a0)
419; CHECK-RV32-NEXT:    sw a3, 4(a0)
420; CHECK-RV32-NEXT:    sw a4, 8(a0)
421; CHECK-RV32-NEXT:    sw a1, 12(a0)
422; CHECK-RV32-NEXT:    ret
423;
424; CHECK-RV64C-LABEL: test_nontemporal_load_v8i16:
425; CHECK-RV64C:       # %bb.0:
426; CHECK-RV64C-NEXT:    c.ntl.all
427; CHECK-RV64C-NEXT:    ld a2, 0(a1)
428; CHECK-RV64C-NEXT:    c.ntl.all
429; CHECK-RV64C-NEXT:    ld a1, 8(a1)
430; CHECK-RV64C-NEXT:    sd a2, 0(a0)
431; CHECK-RV64C-NEXT:    sd a1, 8(a0)
432; CHECK-RV64C-NEXT:    ret
433;
434; CHECK-RV32C-LABEL: test_nontemporal_load_v8i16:
435; CHECK-RV32C:       # %bb.0:
436; CHECK-RV32C-NEXT:    c.ntl.all
437; CHECK-RV32C-NEXT:    lw a2, 0(a1)
438; CHECK-RV32C-NEXT:    c.ntl.all
439; CHECK-RV32C-NEXT:    lw a3, 4(a1)
440; CHECK-RV32C-NEXT:    c.ntl.all
441; CHECK-RV32C-NEXT:    lw a4, 8(a1)
442; CHECK-RV32C-NEXT:    c.ntl.all
443; CHECK-RV32C-NEXT:    lw a1, 12(a1)
444; CHECK-RV32C-NEXT:    sw a2, 0(a0)
445; CHECK-RV32C-NEXT:    sw a3, 4(a0)
446; CHECK-RV32C-NEXT:    sw a4, 8(a0)
447; CHECK-RV32C-NEXT:    sw a1, 12(a0)
448; CHECK-RV32C-NEXT:    ret
449;
450; CHECK-RV64V-LABEL: test_nontemporal_load_v8i16:
451; CHECK-RV64V:       # %bb.0:
452; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
453; CHECK-RV64V-NEXT:    ntl.all
454; CHECK-RV64V-NEXT:    vle16.v v8, (a0)
455; CHECK-RV64V-NEXT:    ret
456;
457; CHECK-RV32V-LABEL: test_nontemporal_load_v8i16:
458; CHECK-RV32V:       # %bb.0:
459; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
460; CHECK-RV32V-NEXT:    ntl.all
461; CHECK-RV32V-NEXT:    vle16.v v8, (a0)
462; CHECK-RV32V-NEXT:    ret
463
464  %1 = load <8 x i16>, ptr %p, !nontemporal !0
465  ret <8 x i16> %1
466}
467
468define <4 x i32> @test_nontemporal_load_v4i32(ptr %p) {
469; CHECK-RV64-LABEL: test_nontemporal_load_v4i32:
470; CHECK-RV64:       # %bb.0:
471; CHECK-RV64-NEXT:    ntl.all
472; CHECK-RV64-NEXT:    ld a2, 0(a1)
473; CHECK-RV64-NEXT:    ntl.all
474; CHECK-RV64-NEXT:    ld a1, 8(a1)
475; CHECK-RV64-NEXT:    sd a2, 0(a0)
476; CHECK-RV64-NEXT:    sd a1, 8(a0)
477; CHECK-RV64-NEXT:    ret
478;
479; CHECK-RV32-LABEL: test_nontemporal_load_v4i32:
480; CHECK-RV32:       # %bb.0:
481; CHECK-RV32-NEXT:    ntl.all
482; CHECK-RV32-NEXT:    lw a2, 0(a1)
483; CHECK-RV32-NEXT:    ntl.all
484; CHECK-RV32-NEXT:    lw a3, 4(a1)
485; CHECK-RV32-NEXT:    ntl.all
486; CHECK-RV32-NEXT:    lw a4, 8(a1)
487; CHECK-RV32-NEXT:    ntl.all
488; CHECK-RV32-NEXT:    lw a1, 12(a1)
489; CHECK-RV32-NEXT:    sw a2, 0(a0)
490; CHECK-RV32-NEXT:    sw a3, 4(a0)
491; CHECK-RV32-NEXT:    sw a4, 8(a0)
492; CHECK-RV32-NEXT:    sw a1, 12(a0)
493; CHECK-RV32-NEXT:    ret
494;
495; CHECK-RV64C-LABEL: test_nontemporal_load_v4i32:
496; CHECK-RV64C:       # %bb.0:
497; CHECK-RV64C-NEXT:    c.ntl.all
498; CHECK-RV64C-NEXT:    ld a2, 0(a1)
499; CHECK-RV64C-NEXT:    c.ntl.all
500; CHECK-RV64C-NEXT:    ld a1, 8(a1)
501; CHECK-RV64C-NEXT:    sd a2, 0(a0)
502; CHECK-RV64C-NEXT:    sd a1, 8(a0)
503; CHECK-RV64C-NEXT:    ret
504;
505; CHECK-RV32C-LABEL: test_nontemporal_load_v4i32:
506; CHECK-RV32C:       # %bb.0:
507; CHECK-RV32C-NEXT:    c.ntl.all
508; CHECK-RV32C-NEXT:    lw a2, 0(a1)
509; CHECK-RV32C-NEXT:    c.ntl.all
510; CHECK-RV32C-NEXT:    lw a3, 4(a1)
511; CHECK-RV32C-NEXT:    c.ntl.all
512; CHECK-RV32C-NEXT:    lw a4, 8(a1)
513; CHECK-RV32C-NEXT:    c.ntl.all
514; CHECK-RV32C-NEXT:    lw a1, 12(a1)
515; CHECK-RV32C-NEXT:    sw a2, 0(a0)
516; CHECK-RV32C-NEXT:    sw a3, 4(a0)
517; CHECK-RV32C-NEXT:    sw a4, 8(a0)
518; CHECK-RV32C-NEXT:    sw a1, 12(a0)
519; CHECK-RV32C-NEXT:    ret
520;
521; CHECK-RV64V-LABEL: test_nontemporal_load_v4i32:
522; CHECK-RV64V:       # %bb.0:
523; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
524; CHECK-RV64V-NEXT:    ntl.all
525; CHECK-RV64V-NEXT:    vle32.v v8, (a0)
526; CHECK-RV64V-NEXT:    ret
527;
528; CHECK-RV32V-LABEL: test_nontemporal_load_v4i32:
529; CHECK-RV32V:       # %bb.0:
530; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
531; CHECK-RV32V-NEXT:    ntl.all
532; CHECK-RV32V-NEXT:    vle32.v v8, (a0)
533; CHECK-RV32V-NEXT:    ret
534
535  %1 = load <4 x i32>, ptr %p, !nontemporal !0
536  ret <4 x i32> %1
537}
538
539define <2 x i64> @test_nontemporal_load_v2i64(ptr %p) {
540; CHECK-RV64-LABEL: test_nontemporal_load_v2i64:
541; CHECK-RV64:       # %bb.0:
542; CHECK-RV64-NEXT:    ntl.all
543; CHECK-RV64-NEXT:    ld a2, 0(a0)
544; CHECK-RV64-NEXT:    ntl.all
545; CHECK-RV64-NEXT:    ld a1, 8(a0)
546; CHECK-RV64-NEXT:    mv a0, a2
547; CHECK-RV64-NEXT:    ret
548;
549; CHECK-RV32-LABEL: test_nontemporal_load_v2i64:
550; CHECK-RV32:       # %bb.0:
551; CHECK-RV32-NEXT:    ntl.all
552; CHECK-RV32-NEXT:    lw a2, 0(a1)
553; CHECK-RV32-NEXT:    ntl.all
554; CHECK-RV32-NEXT:    lw a3, 4(a1)
555; CHECK-RV32-NEXT:    ntl.all
556; CHECK-RV32-NEXT:    lw a4, 8(a1)
557; CHECK-RV32-NEXT:    ntl.all
558; CHECK-RV32-NEXT:    lw a1, 12(a1)
559; CHECK-RV32-NEXT:    sw a2, 0(a0)
560; CHECK-RV32-NEXT:    sw a3, 4(a0)
561; CHECK-RV32-NEXT:    sw a4, 8(a0)
562; CHECK-RV32-NEXT:    sw a1, 12(a0)
563; CHECK-RV32-NEXT:    ret
564;
565; CHECK-RV64C-LABEL: test_nontemporal_load_v2i64:
566; CHECK-RV64C:       # %bb.0:
567; CHECK-RV64C-NEXT:    c.ntl.all
568; CHECK-RV64C-NEXT:    ld a2, 0(a0)
569; CHECK-RV64C-NEXT:    c.ntl.all
570; CHECK-RV64C-NEXT:    ld a1, 8(a0)
571; CHECK-RV64C-NEXT:    mv a0, a2
572; CHECK-RV64C-NEXT:    ret
573;
574; CHECK-RV32C-LABEL: test_nontemporal_load_v2i64:
575; CHECK-RV32C:       # %bb.0:
576; CHECK-RV32C-NEXT:    c.ntl.all
577; CHECK-RV32C-NEXT:    lw a2, 0(a1)
578; CHECK-RV32C-NEXT:    c.ntl.all
579; CHECK-RV32C-NEXT:    lw a3, 4(a1)
580; CHECK-RV32C-NEXT:    c.ntl.all
581; CHECK-RV32C-NEXT:    lw a4, 8(a1)
582; CHECK-RV32C-NEXT:    c.ntl.all
583; CHECK-RV32C-NEXT:    lw a1, 12(a1)
584; CHECK-RV32C-NEXT:    sw a2, 0(a0)
585; CHECK-RV32C-NEXT:    sw a3, 4(a0)
586; CHECK-RV32C-NEXT:    sw a4, 8(a0)
587; CHECK-RV32C-NEXT:    sw a1, 12(a0)
588; CHECK-RV32C-NEXT:    ret
589;
590; CHECK-RV64V-LABEL: test_nontemporal_load_v2i64:
591; CHECK-RV64V:       # %bb.0:
592; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
593; CHECK-RV64V-NEXT:    ntl.all
594; CHECK-RV64V-NEXT:    vle64.v v8, (a0)
595; CHECK-RV64V-NEXT:    ret
596;
597; CHECK-RV32V-LABEL: test_nontemporal_load_v2i64:
598; CHECK-RV32V:       # %bb.0:
599; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
600; CHECK-RV32V-NEXT:    ntl.all
601; CHECK-RV32V-NEXT:    vle64.v v8, (a0)
602; CHECK-RV32V-NEXT:    ret
603
604  %1 = load <2 x i64>, ptr %p, !nontemporal !0
605  ret <2 x i64> %1
606}
607
608define void @test_nontemporal_store_i64(ptr %p, i64 %v) {
609; CHECK-RV64-LABEL: test_nontemporal_store_i64:
610; CHECK-RV64:       # %bb.0:
611; CHECK-RV64-NEXT:    ntl.all
612; CHECK-RV64-NEXT:    sd a1, 0(a0)
613; CHECK-RV64-NEXT:    ret
614;
615; CHECK-RV32-LABEL: test_nontemporal_store_i64:
616; CHECK-RV32:       # %bb.0:
617; CHECK-RV32-NEXT:    ntl.all
618; CHECK-RV32-NEXT:    sw a1, 0(a0)
619; CHECK-RV32-NEXT:    ntl.all
620; CHECK-RV32-NEXT:    sw a2, 4(a0)
621; CHECK-RV32-NEXT:    ret
622;
623; CHECK-RV64C-LABEL: test_nontemporal_store_i64:
624; CHECK-RV64C:       # %bb.0:
625; CHECK-RV64C-NEXT:    c.ntl.all
626; CHECK-RV64C-NEXT:    sd a1, 0(a0)
627; CHECK-RV64C-NEXT:    ret
628;
629; CHECK-RV32C-LABEL: test_nontemporal_store_i64:
630; CHECK-RV32C:       # %bb.0:
631; CHECK-RV32C-NEXT:    c.ntl.all
632; CHECK-RV32C-NEXT:    sw a1, 0(a0)
633; CHECK-RV32C-NEXT:    c.ntl.all
634; CHECK-RV32C-NEXT:    sw a2, 4(a0)
635; CHECK-RV32C-NEXT:    ret
636;
637; CHECK-RV64V-LABEL: test_nontemporal_store_i64:
638; CHECK-RV64V:       # %bb.0:
639; CHECK-RV64V-NEXT:    ntl.all
640; CHECK-RV64V-NEXT:    sd a1, 0(a0)
641; CHECK-RV64V-NEXT:    ret
642;
643; CHECK-RV32V-LABEL: test_nontemporal_store_i64:
644; CHECK-RV32V:       # %bb.0:
645; CHECK-RV32V-NEXT:    ntl.all
646; CHECK-RV32V-NEXT:    sw a1, 0(a0)
647; CHECK-RV32V-NEXT:    ntl.all
648; CHECK-RV32V-NEXT:    sw a2, 4(a0)
649; CHECK-RV32V-NEXT:    ret
650
651  store i64 %v, ptr %p, !nontemporal !0
652  ret void
653}
654
655define void @test_nontemporal_store_i32(ptr %p, i32 %v) {
656; CHECK-RV64-LABEL: test_nontemporal_store_i32:
657; CHECK-RV64:       # %bb.0:
658; CHECK-RV64-NEXT:    ntl.all
659; CHECK-RV64-NEXT:    sw a1, 0(a0)
660; CHECK-RV64-NEXT:    ret
661;
662; CHECK-RV32-LABEL: test_nontemporal_store_i32:
663; CHECK-RV32:       # %bb.0:
664; CHECK-RV32-NEXT:    ntl.all
665; CHECK-RV32-NEXT:    sw a1, 0(a0)
666; CHECK-RV32-NEXT:    ret
667;
668; CHECK-RV64C-LABEL: test_nontemporal_store_i32:
669; CHECK-RV64C:       # %bb.0:
670; CHECK-RV64C-NEXT:    c.ntl.all
671; CHECK-RV64C-NEXT:    sw a1, 0(a0)
672; CHECK-RV64C-NEXT:    ret
673;
674; CHECK-RV32C-LABEL: test_nontemporal_store_i32:
675; CHECK-RV32C:       # %bb.0:
676; CHECK-RV32C-NEXT:    c.ntl.all
677; CHECK-RV32C-NEXT:    sw a1, 0(a0)
678; CHECK-RV32C-NEXT:    ret
679;
680; CHECK-RV64V-LABEL: test_nontemporal_store_i32:
681; CHECK-RV64V:       # %bb.0:
682; CHECK-RV64V-NEXT:    ntl.all
683; CHECK-RV64V-NEXT:    sw a1, 0(a0)
684; CHECK-RV64V-NEXT:    ret
685;
686; CHECK-RV32V-LABEL: test_nontemporal_store_i32:
687; CHECK-RV32V:       # %bb.0:
688; CHECK-RV32V-NEXT:    ntl.all
689; CHECK-RV32V-NEXT:    sw a1, 0(a0)
690; CHECK-RV32V-NEXT:    ret
691
692  store i32 %v, ptr %p, !nontemporal !0
693  ret void
694}
695
696define void @test_nontemporal_store_i16(ptr %p, i16 %v) {
697; CHECK-RV64-LABEL: test_nontemporal_store_i16:
698; CHECK-RV64:       # %bb.0:
699; CHECK-RV64-NEXT:    ntl.all
700; CHECK-RV64-NEXT:    sh a1, 0(a0)
701; CHECK-RV64-NEXT:    ret
702;
703; CHECK-RV32-LABEL: test_nontemporal_store_i16:
704; CHECK-RV32:       # %bb.0:
705; CHECK-RV32-NEXT:    ntl.all
706; CHECK-RV32-NEXT:    sh a1, 0(a0)
707; CHECK-RV32-NEXT:    ret
708;
709; CHECK-RV64C-LABEL: test_nontemporal_store_i16:
710; CHECK-RV64C:       # %bb.0:
711; CHECK-RV64C-NEXT:    c.ntl.all
712; CHECK-RV64C-NEXT:    sh a1, 0(a0)
713; CHECK-RV64C-NEXT:    ret
714;
715; CHECK-RV32C-LABEL: test_nontemporal_store_i16:
716; CHECK-RV32C:       # %bb.0:
717; CHECK-RV32C-NEXT:    c.ntl.all
718; CHECK-RV32C-NEXT:    sh a1, 0(a0)
719; CHECK-RV32C-NEXT:    ret
720;
721; CHECK-RV64V-LABEL: test_nontemporal_store_i16:
722; CHECK-RV64V:       # %bb.0:
723; CHECK-RV64V-NEXT:    ntl.all
724; CHECK-RV64V-NEXT:    sh a1, 0(a0)
725; CHECK-RV64V-NEXT:    ret
726;
727; CHECK-RV32V-LABEL: test_nontemporal_store_i16:
728; CHECK-RV32V:       # %bb.0:
729; CHECK-RV32V-NEXT:    ntl.all
730; CHECK-RV32V-NEXT:    sh a1, 0(a0)
731; CHECK-RV32V-NEXT:    ret
732
733  store i16 %v, ptr %p, !nontemporal !0
734  ret void
735}
736
737define void @test_nontemporal_store_i8(ptr %p, i8 %v) {
738; CHECK-RV64-LABEL: test_nontemporal_store_i8:
739; CHECK-RV64:       # %bb.0:
740; CHECK-RV64-NEXT:    ntl.all
741; CHECK-RV64-NEXT:    sb a1, 0(a0)
742; CHECK-RV64-NEXT:    ret
743;
744; CHECK-RV32-LABEL: test_nontemporal_store_i8:
745; CHECK-RV32:       # %bb.0:
746; CHECK-RV32-NEXT:    ntl.all
747; CHECK-RV32-NEXT:    sb a1, 0(a0)
748; CHECK-RV32-NEXT:    ret
749;
750; CHECK-RV64C-LABEL: test_nontemporal_store_i8:
751; CHECK-RV64C:       # %bb.0:
752; CHECK-RV64C-NEXT:    c.ntl.all
753; CHECK-RV64C-NEXT:    sb a1, 0(a0)
754; CHECK-RV64C-NEXT:    ret
755;
756; CHECK-RV32C-LABEL: test_nontemporal_store_i8:
757; CHECK-RV32C:       # %bb.0:
758; CHECK-RV32C-NEXT:    c.ntl.all
759; CHECK-RV32C-NEXT:    sb a1, 0(a0)
760; CHECK-RV32C-NEXT:    ret
761;
762; CHECK-RV64V-LABEL: test_nontemporal_store_i8:
763; CHECK-RV64V:       # %bb.0:
764; CHECK-RV64V-NEXT:    ntl.all
765; CHECK-RV64V-NEXT:    sb a1, 0(a0)
766; CHECK-RV64V-NEXT:    ret
767;
768; CHECK-RV32V-LABEL: test_nontemporal_store_i8:
769; CHECK-RV32V:       # %bb.0:
770; CHECK-RV32V-NEXT:    ntl.all
771; CHECK-RV32V-NEXT:    sb a1, 0(a0)
772; CHECK-RV32V-NEXT:    ret
773
774  store i8 %v, ptr %p, !nontemporal !0
775  ret void
776}
777
778define void @test_nontemporal_store_half(ptr %p, half %v) {
779; CHECK-RV64-LABEL: test_nontemporal_store_half:
780; CHECK-RV64:       # %bb.0:
781; CHECK-RV64-NEXT:    ntl.all
782; CHECK-RV64-NEXT:    fsh fa0, 0(a0)
783; CHECK-RV64-NEXT:    ret
784;
785; CHECK-RV32-LABEL: test_nontemporal_store_half:
786; CHECK-RV32:       # %bb.0:
787; CHECK-RV32-NEXT:    ntl.all
788; CHECK-RV32-NEXT:    fsh fa0, 0(a0)
789; CHECK-RV32-NEXT:    ret
790;
791; CHECK-RV64C-LABEL: test_nontemporal_store_half:
792; CHECK-RV64C:       # %bb.0:
793; CHECK-RV64C-NEXT:    c.ntl.all
794; CHECK-RV64C-NEXT:    fsh fa0, 0(a0)
795; CHECK-RV64C-NEXT:    ret
796;
797; CHECK-RV32C-LABEL: test_nontemporal_store_half:
798; CHECK-RV32C:       # %bb.0:
799; CHECK-RV32C-NEXT:    c.ntl.all
800; CHECK-RV32C-NEXT:    fsh fa0, 0(a0)
801; CHECK-RV32C-NEXT:    ret
802;
803; CHECK-RV64V-LABEL: test_nontemporal_store_half:
804; CHECK-RV64V:       # %bb.0:
805; CHECK-RV64V-NEXT:    ntl.all
806; CHECK-RV64V-NEXT:    fsh fa0, 0(a0)
807; CHECK-RV64V-NEXT:    ret
808;
809; CHECK-RV32V-LABEL: test_nontemporal_store_half:
810; CHECK-RV32V:       # %bb.0:
811; CHECK-RV32V-NEXT:    ntl.all
812; CHECK-RV32V-NEXT:    fsh fa0, 0(a0)
813; CHECK-RV32V-NEXT:    ret
814
815  store half %v, ptr %p, !nontemporal !0
816  ret void
817}
818
819define void @test_nontemporal_store_float(ptr %p, float %v) {
820; CHECK-RV64-LABEL: test_nontemporal_store_float:
821; CHECK-RV64:       # %bb.0:
822; CHECK-RV64-NEXT:    ntl.all
823; CHECK-RV64-NEXT:    fsw fa0, 0(a0)
824; CHECK-RV64-NEXT:    ret
825;
826; CHECK-RV32-LABEL: test_nontemporal_store_float:
827; CHECK-RV32:       # %bb.0:
828; CHECK-RV32-NEXT:    ntl.all
829; CHECK-RV32-NEXT:    fsw fa0, 0(a0)
830; CHECK-RV32-NEXT:    ret
831;
832; CHECK-RV64C-LABEL: test_nontemporal_store_float:
833; CHECK-RV64C:       # %bb.0:
834; CHECK-RV64C-NEXT:    c.ntl.all
835; CHECK-RV64C-NEXT:    fsw fa0, 0(a0)
836; CHECK-RV64C-NEXT:    ret
837;
838; CHECK-RV32C-LABEL: test_nontemporal_store_float:
839; CHECK-RV32C:       # %bb.0:
840; CHECK-RV32C-NEXT:    c.ntl.all
841; CHECK-RV32C-NEXT:    fsw fa0, 0(a0)
842; CHECK-RV32C-NEXT:    ret
843;
844; CHECK-RV64V-LABEL: test_nontemporal_store_float:
845; CHECK-RV64V:       # %bb.0:
846; CHECK-RV64V-NEXT:    ntl.all
847; CHECK-RV64V-NEXT:    fsw fa0, 0(a0)
848; CHECK-RV64V-NEXT:    ret
849;
850; CHECK-RV32V-LABEL: test_nontemporal_store_float:
851; CHECK-RV32V:       # %bb.0:
852; CHECK-RV32V-NEXT:    ntl.all
853; CHECK-RV32V-NEXT:    fsw fa0, 0(a0)
854; CHECK-RV32V-NEXT:    ret
855
856  store float %v, ptr %p, !nontemporal !0
857  ret void
858}
859
860define void @test_nontemporal_store_double(ptr %p, double %v) {
861; CHECK-RV64-LABEL: test_nontemporal_store_double:
862; CHECK-RV64:       # %bb.0:
863; CHECK-RV64-NEXT:    ntl.all
864; CHECK-RV64-NEXT:    fsd fa0, 0(a0)
865; CHECK-RV64-NEXT:    ret
866;
867; CHECK-RV32-LABEL: test_nontemporal_store_double:
868; CHECK-RV32:       # %bb.0:
869; CHECK-RV32-NEXT:    ntl.all
870; CHECK-RV32-NEXT:    fsd fa0, 0(a0)
871; CHECK-RV32-NEXT:    ret
872;
873; CHECK-RV64C-LABEL: test_nontemporal_store_double:
874; CHECK-RV64C:       # %bb.0:
875; CHECK-RV64C-NEXT:    c.ntl.all
876; CHECK-RV64C-NEXT:    fsd fa0, 0(a0)
877; CHECK-RV64C-NEXT:    ret
878;
879; CHECK-RV32C-LABEL: test_nontemporal_store_double:
880; CHECK-RV32C:       # %bb.0:
881; CHECK-RV32C-NEXT:    c.ntl.all
882; CHECK-RV32C-NEXT:    fsd fa0, 0(a0)
883; CHECK-RV32C-NEXT:    ret
884;
885; CHECK-RV64V-LABEL: test_nontemporal_store_double:
886; CHECK-RV64V:       # %bb.0:
887; CHECK-RV64V-NEXT:    ntl.all
888; CHECK-RV64V-NEXT:    fsd fa0, 0(a0)
889; CHECK-RV64V-NEXT:    ret
890;
891; CHECK-RV32V-LABEL: test_nontemporal_store_double:
892; CHECK-RV32V:       # %bb.0:
893; CHECK-RV32V-NEXT:    ntl.all
894; CHECK-RV32V-NEXT:    fsd fa0, 0(a0)
895; CHECK-RV32V-NEXT:    ret
896
897  store double %v, ptr %p, !nontemporal !0
898  ret void
899}
900
901define void @test_nontemporal_store_v16i8(ptr %p, <16 x i8> %v) {
902; CHECK-RV64-LABEL: test_nontemporal_store_v16i8:
903; CHECK-RV64:       # %bb.0:
904; CHECK-RV64-NEXT:    addi sp, sp, -16
905; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 16
906; CHECK-RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
907; CHECK-RV64-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
908; CHECK-RV64-NEXT:    .cfi_offset s0, -8
909; CHECK-RV64-NEXT:    .cfi_offset s1, -16
910; CHECK-RV64-NEXT:    lbu a2, 0(a1)
911; CHECK-RV64-NEXT:    lbu a3, 8(a1)
912; CHECK-RV64-NEXT:    lbu a4, 16(a1)
913; CHECK-RV64-NEXT:    lbu a5, 24(a1)
914; CHECK-RV64-NEXT:    lbu a6, 32(a1)
915; CHECK-RV64-NEXT:    lbu a7, 40(a1)
916; CHECK-RV64-NEXT:    lbu t0, 48(a1)
917; CHECK-RV64-NEXT:    lbu t1, 56(a1)
918; CHECK-RV64-NEXT:    lbu t2, 64(a1)
919; CHECK-RV64-NEXT:    lbu t3, 72(a1)
920; CHECK-RV64-NEXT:    lbu t4, 80(a1)
921; CHECK-RV64-NEXT:    lbu t5, 88(a1)
922; CHECK-RV64-NEXT:    lbu t6, 96(a1)
923; CHECK-RV64-NEXT:    lbu s0, 104(a1)
924; CHECK-RV64-NEXT:    lbu s1, 112(a1)
925; CHECK-RV64-NEXT:    lbu a1, 120(a1)
926; CHECK-RV64-NEXT:    ntl.all
927; CHECK-RV64-NEXT:    sb t6, 12(a0)
928; CHECK-RV64-NEXT:    ntl.all
929; CHECK-RV64-NEXT:    sb s0, 13(a0)
930; CHECK-RV64-NEXT:    ntl.all
931; CHECK-RV64-NEXT:    sb s1, 14(a0)
932; CHECK-RV64-NEXT:    ntl.all
933; CHECK-RV64-NEXT:    sb a1, 15(a0)
934; CHECK-RV64-NEXT:    ntl.all
935; CHECK-RV64-NEXT:    sb t2, 8(a0)
936; CHECK-RV64-NEXT:    ntl.all
937; CHECK-RV64-NEXT:    sb t3, 9(a0)
938; CHECK-RV64-NEXT:    ntl.all
939; CHECK-RV64-NEXT:    sb t4, 10(a0)
940; CHECK-RV64-NEXT:    ntl.all
941; CHECK-RV64-NEXT:    sb t5, 11(a0)
942; CHECK-RV64-NEXT:    ntl.all
943; CHECK-RV64-NEXT:    sb a6, 4(a0)
944; CHECK-RV64-NEXT:    ntl.all
945; CHECK-RV64-NEXT:    sb a7, 5(a0)
946; CHECK-RV64-NEXT:    ntl.all
947; CHECK-RV64-NEXT:    sb t0, 6(a0)
948; CHECK-RV64-NEXT:    ntl.all
949; CHECK-RV64-NEXT:    sb t1, 7(a0)
950; CHECK-RV64-NEXT:    ntl.all
951; CHECK-RV64-NEXT:    sb a2, 0(a0)
952; CHECK-RV64-NEXT:    ntl.all
953; CHECK-RV64-NEXT:    sb a3, 1(a0)
954; CHECK-RV64-NEXT:    ntl.all
955; CHECK-RV64-NEXT:    sb a4, 2(a0)
956; CHECK-RV64-NEXT:    ntl.all
957; CHECK-RV64-NEXT:    sb a5, 3(a0)
958; CHECK-RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
959; CHECK-RV64-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
960; CHECK-RV64-NEXT:    .cfi_restore s0
961; CHECK-RV64-NEXT:    .cfi_restore s1
962; CHECK-RV64-NEXT:    addi sp, sp, 16
963; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 0
964; CHECK-RV64-NEXT:    ret
965;
966; CHECK-RV32-LABEL: test_nontemporal_store_v16i8:
967; CHECK-RV32:       # %bb.0:
968; CHECK-RV32-NEXT:    addi sp, sp, -16
969; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 16
970; CHECK-RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
971; CHECK-RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
972; CHECK-RV32-NEXT:    .cfi_offset s0, -4
973; CHECK-RV32-NEXT:    .cfi_offset s1, -8
974; CHECK-RV32-NEXT:    lbu a2, 0(a1)
975; CHECK-RV32-NEXT:    lbu a3, 4(a1)
976; CHECK-RV32-NEXT:    lbu a4, 8(a1)
977; CHECK-RV32-NEXT:    lbu a5, 12(a1)
978; CHECK-RV32-NEXT:    lbu a6, 16(a1)
979; CHECK-RV32-NEXT:    lbu a7, 20(a1)
980; CHECK-RV32-NEXT:    lbu t0, 24(a1)
981; CHECK-RV32-NEXT:    lbu t1, 28(a1)
982; CHECK-RV32-NEXT:    lbu t2, 32(a1)
983; CHECK-RV32-NEXT:    lbu t3, 36(a1)
984; CHECK-RV32-NEXT:    lbu t4, 40(a1)
985; CHECK-RV32-NEXT:    lbu t5, 44(a1)
986; CHECK-RV32-NEXT:    lbu t6, 48(a1)
987; CHECK-RV32-NEXT:    lbu s0, 52(a1)
988; CHECK-RV32-NEXT:    lbu s1, 56(a1)
989; CHECK-RV32-NEXT:    lbu a1, 60(a1)
990; CHECK-RV32-NEXT:    ntl.all
991; CHECK-RV32-NEXT:    sb t6, 12(a0)
992; CHECK-RV32-NEXT:    ntl.all
993; CHECK-RV32-NEXT:    sb s0, 13(a0)
994; CHECK-RV32-NEXT:    ntl.all
995; CHECK-RV32-NEXT:    sb s1, 14(a0)
996; CHECK-RV32-NEXT:    ntl.all
997; CHECK-RV32-NEXT:    sb a1, 15(a0)
998; CHECK-RV32-NEXT:    ntl.all
999; CHECK-RV32-NEXT:    sb t2, 8(a0)
1000; CHECK-RV32-NEXT:    ntl.all
1001; CHECK-RV32-NEXT:    sb t3, 9(a0)
1002; CHECK-RV32-NEXT:    ntl.all
1003; CHECK-RV32-NEXT:    sb t4, 10(a0)
1004; CHECK-RV32-NEXT:    ntl.all
1005; CHECK-RV32-NEXT:    sb t5, 11(a0)
1006; CHECK-RV32-NEXT:    ntl.all
1007; CHECK-RV32-NEXT:    sb a6, 4(a0)
1008; CHECK-RV32-NEXT:    ntl.all
1009; CHECK-RV32-NEXT:    sb a7, 5(a0)
1010; CHECK-RV32-NEXT:    ntl.all
1011; CHECK-RV32-NEXT:    sb t0, 6(a0)
1012; CHECK-RV32-NEXT:    ntl.all
1013; CHECK-RV32-NEXT:    sb t1, 7(a0)
1014; CHECK-RV32-NEXT:    ntl.all
1015; CHECK-RV32-NEXT:    sb a2, 0(a0)
1016; CHECK-RV32-NEXT:    ntl.all
1017; CHECK-RV32-NEXT:    sb a3, 1(a0)
1018; CHECK-RV32-NEXT:    ntl.all
1019; CHECK-RV32-NEXT:    sb a4, 2(a0)
1020; CHECK-RV32-NEXT:    ntl.all
1021; CHECK-RV32-NEXT:    sb a5, 3(a0)
1022; CHECK-RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
1023; CHECK-RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
1024; CHECK-RV32-NEXT:    .cfi_restore s0
1025; CHECK-RV32-NEXT:    .cfi_restore s1
1026; CHECK-RV32-NEXT:    addi sp, sp, 16
1027; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 0
1028; CHECK-RV32-NEXT:    ret
1029;
1030; CHECK-RV64C-LABEL: test_nontemporal_store_v16i8:
1031; CHECK-RV64C:       # %bb.0:
1032; CHECK-RV64C-NEXT:    addi sp, sp, -16
1033; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 16
1034; CHECK-RV64C-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
1035; CHECK-RV64C-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
1036; CHECK-RV64C-NEXT:    .cfi_offset s0, -8
1037; CHECK-RV64C-NEXT:    .cfi_offset s1, -16
1038; CHECK-RV64C-NEXT:    lbu a6, 0(a1)
1039; CHECK-RV64C-NEXT:    lbu a7, 8(a1)
1040; CHECK-RV64C-NEXT:    lbu t0, 16(a1)
1041; CHECK-RV64C-NEXT:    lbu t1, 24(a1)
1042; CHECK-RV64C-NEXT:    lbu t2, 32(a1)
1043; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
1044; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
1045; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
1046; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
1047; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
1048; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
1049; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
1050; CHECK-RV64C-NEXT:    lbu a2, 96(a1)
1051; CHECK-RV64C-NEXT:    lbu s0, 104(a1)
1052; CHECK-RV64C-NEXT:    lbu s1, 112(a1)
1053; CHECK-RV64C-NEXT:    lbu a1, 120(a1)
1054; CHECK-RV64C-NEXT:    c.ntl.all
1055; CHECK-RV64C-NEXT:    sb a2, 12(a0)
1056; CHECK-RV64C-NEXT:    c.ntl.all
1057; CHECK-RV64C-NEXT:    sb s0, 13(a0)
1058; CHECK-RV64C-NEXT:    c.ntl.all
1059; CHECK-RV64C-NEXT:    sb s1, 14(a0)
1060; CHECK-RV64C-NEXT:    c.ntl.all
1061; CHECK-RV64C-NEXT:    sb a1, 15(a0)
1062; CHECK-RV64C-NEXT:    c.ntl.all
1063; CHECK-RV64C-NEXT:    sb t6, 8(a0)
1064; CHECK-RV64C-NEXT:    c.ntl.all
1065; CHECK-RV64C-NEXT:    sb a3, 9(a0)
1066; CHECK-RV64C-NEXT:    c.ntl.all
1067; CHECK-RV64C-NEXT:    sb a4, 10(a0)
1068; CHECK-RV64C-NEXT:    c.ntl.all
1069; CHECK-RV64C-NEXT:    sb a5, 11(a0)
1070; CHECK-RV64C-NEXT:    c.ntl.all
1071; CHECK-RV64C-NEXT:    sb t2, 4(a0)
1072; CHECK-RV64C-NEXT:    c.ntl.all
1073; CHECK-RV64C-NEXT:    sb t3, 5(a0)
1074; CHECK-RV64C-NEXT:    c.ntl.all
1075; CHECK-RV64C-NEXT:    sb t4, 6(a0)
1076; CHECK-RV64C-NEXT:    c.ntl.all
1077; CHECK-RV64C-NEXT:    sb t5, 7(a0)
1078; CHECK-RV64C-NEXT:    c.ntl.all
1079; CHECK-RV64C-NEXT:    sb a6, 0(a0)
1080; CHECK-RV64C-NEXT:    c.ntl.all
1081; CHECK-RV64C-NEXT:    sb a7, 1(a0)
1082; CHECK-RV64C-NEXT:    c.ntl.all
1083; CHECK-RV64C-NEXT:    sb t0, 2(a0)
1084; CHECK-RV64C-NEXT:    c.ntl.all
1085; CHECK-RV64C-NEXT:    sb t1, 3(a0)
1086; CHECK-RV64C-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
1087; CHECK-RV64C-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
1088; CHECK-RV64C-NEXT:    .cfi_restore s0
1089; CHECK-RV64C-NEXT:    .cfi_restore s1
1090; CHECK-RV64C-NEXT:    addi sp, sp, 16
1091; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 0
1092; CHECK-RV64C-NEXT:    ret
1093;
1094; CHECK-RV32C-LABEL: test_nontemporal_store_v16i8:
1095; CHECK-RV32C:       # %bb.0:
1096; CHECK-RV32C-NEXT:    addi sp, sp, -16
1097; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 16
1098; CHECK-RV32C-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
1099; CHECK-RV32C-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
1100; CHECK-RV32C-NEXT:    .cfi_offset s0, -4
1101; CHECK-RV32C-NEXT:    .cfi_offset s1, -8
1102; CHECK-RV32C-NEXT:    lbu a6, 0(a1)
1103; CHECK-RV32C-NEXT:    lbu a7, 4(a1)
1104; CHECK-RV32C-NEXT:    lbu t0, 8(a1)
1105; CHECK-RV32C-NEXT:    lbu t1, 12(a1)
1106; CHECK-RV32C-NEXT:    lbu t2, 16(a1)
1107; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
1108; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
1109; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
1110; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
1111; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
1112; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
1113; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
1114; CHECK-RV32C-NEXT:    lbu a2, 48(a1)
1115; CHECK-RV32C-NEXT:    lbu s0, 52(a1)
1116; CHECK-RV32C-NEXT:    lbu s1, 56(a1)
1117; CHECK-RV32C-NEXT:    lbu a1, 60(a1)
1118; CHECK-RV32C-NEXT:    c.ntl.all
1119; CHECK-RV32C-NEXT:    sb a2, 12(a0)
1120; CHECK-RV32C-NEXT:    c.ntl.all
1121; CHECK-RV32C-NEXT:    sb s0, 13(a0)
1122; CHECK-RV32C-NEXT:    c.ntl.all
1123; CHECK-RV32C-NEXT:    sb s1, 14(a0)
1124; CHECK-RV32C-NEXT:    c.ntl.all
1125; CHECK-RV32C-NEXT:    sb a1, 15(a0)
1126; CHECK-RV32C-NEXT:    c.ntl.all
1127; CHECK-RV32C-NEXT:    sb t6, 8(a0)
1128; CHECK-RV32C-NEXT:    c.ntl.all
1129; CHECK-RV32C-NEXT:    sb a3, 9(a0)
1130; CHECK-RV32C-NEXT:    c.ntl.all
1131; CHECK-RV32C-NEXT:    sb a4, 10(a0)
1132; CHECK-RV32C-NEXT:    c.ntl.all
1133; CHECK-RV32C-NEXT:    sb a5, 11(a0)
1134; CHECK-RV32C-NEXT:    c.ntl.all
1135; CHECK-RV32C-NEXT:    sb t2, 4(a0)
1136; CHECK-RV32C-NEXT:    c.ntl.all
1137; CHECK-RV32C-NEXT:    sb t3, 5(a0)
1138; CHECK-RV32C-NEXT:    c.ntl.all
1139; CHECK-RV32C-NEXT:    sb t4, 6(a0)
1140; CHECK-RV32C-NEXT:    c.ntl.all
1141; CHECK-RV32C-NEXT:    sb t5, 7(a0)
1142; CHECK-RV32C-NEXT:    c.ntl.all
1143; CHECK-RV32C-NEXT:    sb a6, 0(a0)
1144; CHECK-RV32C-NEXT:    c.ntl.all
1145; CHECK-RV32C-NEXT:    sb a7, 1(a0)
1146; CHECK-RV32C-NEXT:    c.ntl.all
1147; CHECK-RV32C-NEXT:    sb t0, 2(a0)
1148; CHECK-RV32C-NEXT:    c.ntl.all
1149; CHECK-RV32C-NEXT:    sb t1, 3(a0)
1150; CHECK-RV32C-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
1151; CHECK-RV32C-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
1152; CHECK-RV32C-NEXT:    .cfi_restore s0
1153; CHECK-RV32C-NEXT:    .cfi_restore s1
1154; CHECK-RV32C-NEXT:    addi sp, sp, 16
1155; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 0
1156; CHECK-RV32C-NEXT:    ret
1157;
1158; CHECK-RV64V-LABEL: test_nontemporal_store_v16i8:
1159; CHECK-RV64V:       # %bb.0:
1160; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
1161; CHECK-RV64V-NEXT:    ntl.all
1162; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
1163; CHECK-RV64V-NEXT:    ret
1164;
1165; CHECK-RV32V-LABEL: test_nontemporal_store_v16i8:
1166; CHECK-RV32V:       # %bb.0:
1167; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
1168; CHECK-RV32V-NEXT:    ntl.all
1169; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
1170; CHECK-RV32V-NEXT:    ret
1171  store <16 x i8> %v, ptr %p, !nontemporal !0
1172  ret void
1173}
1174
1175define void @test_nontemporal_store_v8i16(ptr %p, <8 x i16> %v) {
1176; CHECK-RV64-LABEL: test_nontemporal_store_v8i16:
1177; CHECK-RV64:       # %bb.0:
1178; CHECK-RV64-NEXT:    lh a2, 0(a1)
1179; CHECK-RV64-NEXT:    lh a3, 8(a1)
1180; CHECK-RV64-NEXT:    lh a4, 16(a1)
1181; CHECK-RV64-NEXT:    lh a5, 24(a1)
1182; CHECK-RV64-NEXT:    lh a6, 32(a1)
1183; CHECK-RV64-NEXT:    lh a7, 40(a1)
1184; CHECK-RV64-NEXT:    lh t0, 48(a1)
1185; CHECK-RV64-NEXT:    lh a1, 56(a1)
1186; CHECK-RV64-NEXT:    ntl.all
1187; CHECK-RV64-NEXT:    sh a6, 8(a0)
1188; CHECK-RV64-NEXT:    ntl.all
1189; CHECK-RV64-NEXT:    sh a7, 10(a0)
1190; CHECK-RV64-NEXT:    ntl.all
1191; CHECK-RV64-NEXT:    sh t0, 12(a0)
1192; CHECK-RV64-NEXT:    ntl.all
1193; CHECK-RV64-NEXT:    sh a1, 14(a0)
1194; CHECK-RV64-NEXT:    ntl.all
1195; CHECK-RV64-NEXT:    sh a2, 0(a0)
1196; CHECK-RV64-NEXT:    ntl.all
1197; CHECK-RV64-NEXT:    sh a3, 2(a0)
1198; CHECK-RV64-NEXT:    ntl.all
1199; CHECK-RV64-NEXT:    sh a4, 4(a0)
1200; CHECK-RV64-NEXT:    ntl.all
1201; CHECK-RV64-NEXT:    sh a5, 6(a0)
1202; CHECK-RV64-NEXT:    ret
1203;
1204; CHECK-RV32-LABEL: test_nontemporal_store_v8i16:
1205; CHECK-RV32:       # %bb.0:
1206; CHECK-RV32-NEXT:    lh a2, 0(a1)
1207; CHECK-RV32-NEXT:    lh a3, 4(a1)
1208; CHECK-RV32-NEXT:    lh a4, 8(a1)
1209; CHECK-RV32-NEXT:    lh a5, 12(a1)
1210; CHECK-RV32-NEXT:    lh a6, 16(a1)
1211; CHECK-RV32-NEXT:    lh a7, 20(a1)
1212; CHECK-RV32-NEXT:    lh t0, 24(a1)
1213; CHECK-RV32-NEXT:    lh a1, 28(a1)
1214; CHECK-RV32-NEXT:    ntl.all
1215; CHECK-RV32-NEXT:    sh a6, 8(a0)
1216; CHECK-RV32-NEXT:    ntl.all
1217; CHECK-RV32-NEXT:    sh a7, 10(a0)
1218; CHECK-RV32-NEXT:    ntl.all
1219; CHECK-RV32-NEXT:    sh t0, 12(a0)
1220; CHECK-RV32-NEXT:    ntl.all
1221; CHECK-RV32-NEXT:    sh a1, 14(a0)
1222; CHECK-RV32-NEXT:    ntl.all
1223; CHECK-RV32-NEXT:    sh a2, 0(a0)
1224; CHECK-RV32-NEXT:    ntl.all
1225; CHECK-RV32-NEXT:    sh a3, 2(a0)
1226; CHECK-RV32-NEXT:    ntl.all
1227; CHECK-RV32-NEXT:    sh a4, 4(a0)
1228; CHECK-RV32-NEXT:    ntl.all
1229; CHECK-RV32-NEXT:    sh a5, 6(a0)
1230; CHECK-RV32-NEXT:    ret
1231;
1232; CHECK-RV64C-LABEL: test_nontemporal_store_v8i16:
1233; CHECK-RV64C:       # %bb.0:
1234; CHECK-RV64C-NEXT:    lh a6, 0(a1)
1235; CHECK-RV64C-NEXT:    lh a7, 8(a1)
1236; CHECK-RV64C-NEXT:    lh t0, 16(a1)
1237; CHECK-RV64C-NEXT:    lh a5, 24(a1)
1238; CHECK-RV64C-NEXT:    lh a2, 32(a1)
1239; CHECK-RV64C-NEXT:    lh a3, 40(a1)
1240; CHECK-RV64C-NEXT:    lh a4, 48(a1)
1241; CHECK-RV64C-NEXT:    lh a1, 56(a1)
1242; CHECK-RV64C-NEXT:    c.ntl.all
1243; CHECK-RV64C-NEXT:    sh a2, 8(a0)
1244; CHECK-RV64C-NEXT:    c.ntl.all
1245; CHECK-RV64C-NEXT:    sh a3, 10(a0)
1246; CHECK-RV64C-NEXT:    c.ntl.all
1247; CHECK-RV64C-NEXT:    sh a4, 12(a0)
1248; CHECK-RV64C-NEXT:    c.ntl.all
1249; CHECK-RV64C-NEXT:    sh a1, 14(a0)
1250; CHECK-RV64C-NEXT:    c.ntl.all
1251; CHECK-RV64C-NEXT:    sh a6, 0(a0)
1252; CHECK-RV64C-NEXT:    c.ntl.all
1253; CHECK-RV64C-NEXT:    sh a7, 2(a0)
1254; CHECK-RV64C-NEXT:    c.ntl.all
1255; CHECK-RV64C-NEXT:    sh t0, 4(a0)
1256; CHECK-RV64C-NEXT:    c.ntl.all
1257; CHECK-RV64C-NEXT:    sh a5, 6(a0)
1258; CHECK-RV64C-NEXT:    ret
1259;
1260; CHECK-RV32C-LABEL: test_nontemporal_store_v8i16:
1261; CHECK-RV32C:       # %bb.0:
1262; CHECK-RV32C-NEXT:    lh a6, 0(a1)
1263; CHECK-RV32C-NEXT:    lh a7, 4(a1)
1264; CHECK-RV32C-NEXT:    lh t0, 8(a1)
1265; CHECK-RV32C-NEXT:    lh a5, 12(a1)
1266; CHECK-RV32C-NEXT:    lh a2, 16(a1)
1267; CHECK-RV32C-NEXT:    lh a3, 20(a1)
1268; CHECK-RV32C-NEXT:    lh a4, 24(a1)
1269; CHECK-RV32C-NEXT:    lh a1, 28(a1)
1270; CHECK-RV32C-NEXT:    c.ntl.all
1271; CHECK-RV32C-NEXT:    sh a2, 8(a0)
1272; CHECK-RV32C-NEXT:    c.ntl.all
1273; CHECK-RV32C-NEXT:    sh a3, 10(a0)
1274; CHECK-RV32C-NEXT:    c.ntl.all
1275; CHECK-RV32C-NEXT:    sh a4, 12(a0)
1276; CHECK-RV32C-NEXT:    c.ntl.all
1277; CHECK-RV32C-NEXT:    sh a1, 14(a0)
1278; CHECK-RV32C-NEXT:    c.ntl.all
1279; CHECK-RV32C-NEXT:    sh a6, 0(a0)
1280; CHECK-RV32C-NEXT:    c.ntl.all
1281; CHECK-RV32C-NEXT:    sh a7, 2(a0)
1282; CHECK-RV32C-NEXT:    c.ntl.all
1283; CHECK-RV32C-NEXT:    sh t0, 4(a0)
1284; CHECK-RV32C-NEXT:    c.ntl.all
1285; CHECK-RV32C-NEXT:    sh a5, 6(a0)
1286; CHECK-RV32C-NEXT:    ret
1287;
1288; CHECK-RV64V-LABEL: test_nontemporal_store_v8i16:
1289; CHECK-RV64V:       # %bb.0:
1290; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1291; CHECK-RV64V-NEXT:    ntl.all
1292; CHECK-RV64V-NEXT:    vse16.v v8, (a0)
1293; CHECK-RV64V-NEXT:    ret
1294;
1295; CHECK-RV32V-LABEL: test_nontemporal_store_v8i16:
1296; CHECK-RV32V:       # %bb.0:
1297; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1298; CHECK-RV32V-NEXT:    ntl.all
1299; CHECK-RV32V-NEXT:    vse16.v v8, (a0)
1300; CHECK-RV32V-NEXT:    ret
1301  store <8 x i16> %v, ptr %p, !nontemporal !0
1302  ret void
1303}
1304
1305define void @test_nontemporal_store_v4i32(ptr %p, <4 x i32> %v) {
1306; CHECK-RV64-LABEL: test_nontemporal_store_v4i32:
1307; CHECK-RV64:       # %bb.0:
1308; CHECK-RV64-NEXT:    lw a2, 0(a1)
1309; CHECK-RV64-NEXT:    lw a3, 8(a1)
1310; CHECK-RV64-NEXT:    lw a4, 16(a1)
1311; CHECK-RV64-NEXT:    lw a1, 24(a1)
1312; CHECK-RV64-NEXT:    ntl.all
1313; CHECK-RV64-NEXT:    sw a2, 0(a0)
1314; CHECK-RV64-NEXT:    ntl.all
1315; CHECK-RV64-NEXT:    sw a3, 4(a0)
1316; CHECK-RV64-NEXT:    ntl.all
1317; CHECK-RV64-NEXT:    sw a4, 8(a0)
1318; CHECK-RV64-NEXT:    ntl.all
1319; CHECK-RV64-NEXT:    sw a1, 12(a0)
1320; CHECK-RV64-NEXT:    ret
1321;
1322; CHECK-RV32-LABEL: test_nontemporal_store_v4i32:
1323; CHECK-RV32:       # %bb.0:
1324; CHECK-RV32-NEXT:    lw a2, 0(a1)
1325; CHECK-RV32-NEXT:    lw a3, 4(a1)
1326; CHECK-RV32-NEXT:    lw a4, 8(a1)
1327; CHECK-RV32-NEXT:    lw a1, 12(a1)
1328; CHECK-RV32-NEXT:    ntl.all
1329; CHECK-RV32-NEXT:    sw a2, 0(a0)
1330; CHECK-RV32-NEXT:    ntl.all
1331; CHECK-RV32-NEXT:    sw a3, 4(a0)
1332; CHECK-RV32-NEXT:    ntl.all
1333; CHECK-RV32-NEXT:    sw a4, 8(a0)
1334; CHECK-RV32-NEXT:    ntl.all
1335; CHECK-RV32-NEXT:    sw a1, 12(a0)
1336; CHECK-RV32-NEXT:    ret
1337;
1338; CHECK-RV64C-LABEL: test_nontemporal_store_v4i32:
1339; CHECK-RV64C:       # %bb.0:
1340; CHECK-RV64C-NEXT:    lw a2, 0(a1)
1341; CHECK-RV64C-NEXT:    lw a3, 8(a1)
1342; CHECK-RV64C-NEXT:    lw a4, 16(a1)
1343; CHECK-RV64C-NEXT:    lw a1, 24(a1)
1344; CHECK-RV64C-NEXT:    c.ntl.all
1345; CHECK-RV64C-NEXT:    sw a2, 0(a0)
1346; CHECK-RV64C-NEXT:    c.ntl.all
1347; CHECK-RV64C-NEXT:    sw a3, 4(a0)
1348; CHECK-RV64C-NEXT:    c.ntl.all
1349; CHECK-RV64C-NEXT:    sw a4, 8(a0)
1350; CHECK-RV64C-NEXT:    c.ntl.all
1351; CHECK-RV64C-NEXT:    sw a1, 12(a0)
1352; CHECK-RV64C-NEXT:    ret
1353;
1354; CHECK-RV32C-LABEL: test_nontemporal_store_v4i32:
1355; CHECK-RV32C:       # %bb.0:
1356; CHECK-RV32C-NEXT:    lw a2, 0(a1)
1357; CHECK-RV32C-NEXT:    lw a3, 4(a1)
1358; CHECK-RV32C-NEXT:    lw a4, 8(a1)
1359; CHECK-RV32C-NEXT:    lw a1, 12(a1)
1360; CHECK-RV32C-NEXT:    c.ntl.all
1361; CHECK-RV32C-NEXT:    sw a2, 0(a0)
1362; CHECK-RV32C-NEXT:    c.ntl.all
1363; CHECK-RV32C-NEXT:    sw a3, 4(a0)
1364; CHECK-RV32C-NEXT:    c.ntl.all
1365; CHECK-RV32C-NEXT:    sw a4, 8(a0)
1366; CHECK-RV32C-NEXT:    c.ntl.all
1367; CHECK-RV32C-NEXT:    sw a1, 12(a0)
1368; CHECK-RV32C-NEXT:    ret
1369;
1370; CHECK-RV64V-LABEL: test_nontemporal_store_v4i32:
1371; CHECK-RV64V:       # %bb.0:
1372; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1373; CHECK-RV64V-NEXT:    ntl.all
1374; CHECK-RV64V-NEXT:    vse32.v v8, (a0)
1375; CHECK-RV64V-NEXT:    ret
1376;
1377; CHECK-RV32V-LABEL: test_nontemporal_store_v4i32:
1378; CHECK-RV32V:       # %bb.0:
1379; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1380; CHECK-RV32V-NEXT:    ntl.all
1381; CHECK-RV32V-NEXT:    vse32.v v8, (a0)
1382; CHECK-RV32V-NEXT:    ret
1383  store <4 x i32> %v, ptr %p, !nontemporal !0
1384  ret void
1385}
1386
1387define void @test_nontemporal_store_v2i64(ptr %p, <2 x i64> %v) {
1388; CHECK-RV64-LABEL: test_nontemporal_store_v2i64:
1389; CHECK-RV64:       # %bb.0:
1390; CHECK-RV64-NEXT:    ntl.all
1391; CHECK-RV64-NEXT:    sd a1, 0(a0)
1392; CHECK-RV64-NEXT:    ntl.all
1393; CHECK-RV64-NEXT:    sd a2, 8(a0)
1394; CHECK-RV64-NEXT:    ret
1395;
1396; CHECK-RV32-LABEL: test_nontemporal_store_v2i64:
1397; CHECK-RV32:       # %bb.0:
1398; CHECK-RV32-NEXT:    lw a2, 0(a1)
1399; CHECK-RV32-NEXT:    lw a3, 4(a1)
1400; CHECK-RV32-NEXT:    lw a4, 8(a1)
1401; CHECK-RV32-NEXT:    lw a1, 12(a1)
1402; CHECK-RV32-NEXT:    ntl.all
1403; CHECK-RV32-NEXT:    sw a2, 0(a0)
1404; CHECK-RV32-NEXT:    ntl.all
1405; CHECK-RV32-NEXT:    sw a3, 4(a0)
1406; CHECK-RV32-NEXT:    ntl.all
1407; CHECK-RV32-NEXT:    sw a4, 8(a0)
1408; CHECK-RV32-NEXT:    ntl.all
1409; CHECK-RV32-NEXT:    sw a1, 12(a0)
1410; CHECK-RV32-NEXT:    ret
1411;
1412; CHECK-RV64C-LABEL: test_nontemporal_store_v2i64:
1413; CHECK-RV64C:       # %bb.0:
1414; CHECK-RV64C-NEXT:    c.ntl.all
1415; CHECK-RV64C-NEXT:    sd a1, 0(a0)
1416; CHECK-RV64C-NEXT:    c.ntl.all
1417; CHECK-RV64C-NEXT:    sd a2, 8(a0)
1418; CHECK-RV64C-NEXT:    ret
1419;
1420; CHECK-RV32C-LABEL: test_nontemporal_store_v2i64:
1421; CHECK-RV32C:       # %bb.0:
1422; CHECK-RV32C-NEXT:    lw a2, 0(a1)
1423; CHECK-RV32C-NEXT:    lw a3, 4(a1)
1424; CHECK-RV32C-NEXT:    lw a4, 8(a1)
1425; CHECK-RV32C-NEXT:    lw a1, 12(a1)
1426; CHECK-RV32C-NEXT:    c.ntl.all
1427; CHECK-RV32C-NEXT:    sw a2, 0(a0)
1428; CHECK-RV32C-NEXT:    c.ntl.all
1429; CHECK-RV32C-NEXT:    sw a3, 4(a0)
1430; CHECK-RV32C-NEXT:    c.ntl.all
1431; CHECK-RV32C-NEXT:    sw a4, 8(a0)
1432; CHECK-RV32C-NEXT:    c.ntl.all
1433; CHECK-RV32C-NEXT:    sw a1, 12(a0)
1434; CHECK-RV32C-NEXT:    ret
1435;
1436; CHECK-RV64V-LABEL: test_nontemporal_store_v2i64:
1437; CHECK-RV64V:       # %bb.0:
1438; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1439; CHECK-RV64V-NEXT:    ntl.all
1440; CHECK-RV64V-NEXT:    vse64.v v8, (a0)
1441; CHECK-RV64V-NEXT:    ret
1442;
1443; CHECK-RV32V-LABEL: test_nontemporal_store_v2i64:
1444; CHECK-RV32V:       # %bb.0:
1445; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
1446; CHECK-RV32V-NEXT:    ntl.all
1447; CHECK-RV32V-NEXT:    vse64.v v8, (a0)
1448; CHECK-RV32V-NEXT:    ret
1449  store <2 x i64> %v, ptr %p, !nontemporal !0
1450  ret void
1451}
1452
1453define i64 @test_nontemporal_P1_load_i64(ptr %p) {
1454; CHECK-RV64-LABEL: test_nontemporal_P1_load_i64:
1455; CHECK-RV64:       # %bb.0:
1456; CHECK-RV64-NEXT:    ntl.p1
1457; CHECK-RV64-NEXT:    ld a0, 0(a0)
1458; CHECK-RV64-NEXT:    ret
1459;
1460; CHECK-RV32-LABEL: test_nontemporal_P1_load_i64:
1461; CHECK-RV32:       # %bb.0:
1462; CHECK-RV32-NEXT:    ntl.p1
1463; CHECK-RV32-NEXT:    lw a2, 0(a0)
1464; CHECK-RV32-NEXT:    ntl.p1
1465; CHECK-RV32-NEXT:    lw a1, 4(a0)
1466; CHECK-RV32-NEXT:    mv a0, a2
1467; CHECK-RV32-NEXT:    ret
1468;
1469; CHECK-RV64C-LABEL: test_nontemporal_P1_load_i64:
1470; CHECK-RV64C:       # %bb.0:
1471; CHECK-RV64C-NEXT:    c.ntl.p1
1472; CHECK-RV64C-NEXT:    ld a0, 0(a0)
1473; CHECK-RV64C-NEXT:    ret
1474;
1475; CHECK-RV32C-LABEL: test_nontemporal_P1_load_i64:
1476; CHECK-RV32C:       # %bb.0:
1477; CHECK-RV32C-NEXT:    c.ntl.p1
1478; CHECK-RV32C-NEXT:    lw a2, 0(a0)
1479; CHECK-RV32C-NEXT:    c.ntl.p1
1480; CHECK-RV32C-NEXT:    lw a1, 4(a0)
1481; CHECK-RV32C-NEXT:    mv a0, a2
1482; CHECK-RV32C-NEXT:    ret
1483;
1484; CHECK-RV64V-LABEL: test_nontemporal_P1_load_i64:
1485; CHECK-RV64V:       # %bb.0:
1486; CHECK-RV64V-NEXT:    ntl.p1
1487; CHECK-RV64V-NEXT:    ld a0, 0(a0)
1488; CHECK-RV64V-NEXT:    ret
1489;
1490; CHECK-RV32V-LABEL: test_nontemporal_P1_load_i64:
1491; CHECK-RV32V:       # %bb.0:
1492; CHECK-RV32V-NEXT:    ntl.p1
1493; CHECK-RV32V-NEXT:    lw a2, 0(a0)
1494; CHECK-RV32V-NEXT:    ntl.p1
1495; CHECK-RV32V-NEXT:    lw a1, 4(a0)
1496; CHECK-RV32V-NEXT:    mv a0, a2
1497; CHECK-RV32V-NEXT:    ret
1498  %1 = load i64, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1499  ret i64 %1
1500}
1501
1502define i32 @test_nontemporal_P1_load_i32(ptr %p) {
1503; CHECK-RV64-LABEL: test_nontemporal_P1_load_i32:
1504; CHECK-RV64:       # %bb.0:
1505; CHECK-RV64-NEXT:    ntl.p1
1506; CHECK-RV64-NEXT:    lw a0, 0(a0)
1507; CHECK-RV64-NEXT:    ret
1508;
1509; CHECK-RV32-LABEL: test_nontemporal_P1_load_i32:
1510; CHECK-RV32:       # %bb.0:
1511; CHECK-RV32-NEXT:    ntl.p1
1512; CHECK-RV32-NEXT:    lw a0, 0(a0)
1513; CHECK-RV32-NEXT:    ret
1514;
1515; CHECK-RV64C-LABEL: test_nontemporal_P1_load_i32:
1516; CHECK-RV64C:       # %bb.0:
1517; CHECK-RV64C-NEXT:    c.ntl.p1
1518; CHECK-RV64C-NEXT:    lw a0, 0(a0)
1519; CHECK-RV64C-NEXT:    ret
1520;
1521; CHECK-RV32C-LABEL: test_nontemporal_P1_load_i32:
1522; CHECK-RV32C:       # %bb.0:
1523; CHECK-RV32C-NEXT:    c.ntl.p1
1524; CHECK-RV32C-NEXT:    lw a0, 0(a0)
1525; CHECK-RV32C-NEXT:    ret
1526;
1527; CHECK-RV64V-LABEL: test_nontemporal_P1_load_i32:
1528; CHECK-RV64V:       # %bb.0:
1529; CHECK-RV64V-NEXT:    ntl.p1
1530; CHECK-RV64V-NEXT:    lw a0, 0(a0)
1531; CHECK-RV64V-NEXT:    ret
1532;
1533; CHECK-RV32V-LABEL: test_nontemporal_P1_load_i32:
1534; CHECK-RV32V:       # %bb.0:
1535; CHECK-RV32V-NEXT:    ntl.p1
1536; CHECK-RV32V-NEXT:    lw a0, 0(a0)
1537; CHECK-RV32V-NEXT:    ret
1538  %1 = load i32, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1539  ret i32 %1
1540}
1541
1542define i16 @test_nontemporal_P1_load_i16(ptr %p) {
1543; CHECK-RV64-LABEL: test_nontemporal_P1_load_i16:
1544; CHECK-RV64:       # %bb.0:
1545; CHECK-RV64-NEXT:    ntl.p1
1546; CHECK-RV64-NEXT:    lh a0, 0(a0)
1547; CHECK-RV64-NEXT:    ret
1548;
1549; CHECK-RV32-LABEL: test_nontemporal_P1_load_i16:
1550; CHECK-RV32:       # %bb.0:
1551; CHECK-RV32-NEXT:    ntl.p1
1552; CHECK-RV32-NEXT:    lh a0, 0(a0)
1553; CHECK-RV32-NEXT:    ret
1554;
1555; CHECK-RV64C-LABEL: test_nontemporal_P1_load_i16:
1556; CHECK-RV64C:       # %bb.0:
1557; CHECK-RV64C-NEXT:    c.ntl.p1
1558; CHECK-RV64C-NEXT:    lh a0, 0(a0)
1559; CHECK-RV64C-NEXT:    ret
1560;
1561; CHECK-RV32C-LABEL: test_nontemporal_P1_load_i16:
1562; CHECK-RV32C:       # %bb.0:
1563; CHECK-RV32C-NEXT:    c.ntl.p1
1564; CHECK-RV32C-NEXT:    lh a0, 0(a0)
1565; CHECK-RV32C-NEXT:    ret
1566;
1567; CHECK-RV64V-LABEL: test_nontemporal_P1_load_i16:
1568; CHECK-RV64V:       # %bb.0:
1569; CHECK-RV64V-NEXT:    ntl.p1
1570; CHECK-RV64V-NEXT:    lh a0, 0(a0)
1571; CHECK-RV64V-NEXT:    ret
1572;
1573; CHECK-RV32V-LABEL: test_nontemporal_P1_load_i16:
1574; CHECK-RV32V:       # %bb.0:
1575; CHECK-RV32V-NEXT:    ntl.p1
1576; CHECK-RV32V-NEXT:    lh a0, 0(a0)
1577; CHECK-RV32V-NEXT:    ret
1578  %1 = load i16, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1579  ret i16 %1
1580}
1581
1582define i8 @test_nontemporal_P1_load_i8(ptr %p) {
1583; CHECK-RV64-LABEL: test_nontemporal_P1_load_i8:
1584; CHECK-RV64:       # %bb.0:
1585; CHECK-RV64-NEXT:    ntl.p1
1586; CHECK-RV64-NEXT:    lbu a0, 0(a0)
1587; CHECK-RV64-NEXT:    ret
1588;
1589; CHECK-RV32-LABEL: test_nontemporal_P1_load_i8:
1590; CHECK-RV32:       # %bb.0:
1591; CHECK-RV32-NEXT:    ntl.p1
1592; CHECK-RV32-NEXT:    lbu a0, 0(a0)
1593; CHECK-RV32-NEXT:    ret
1594;
1595; CHECK-RV64C-LABEL: test_nontemporal_P1_load_i8:
1596; CHECK-RV64C:       # %bb.0:
1597; CHECK-RV64C-NEXT:    c.ntl.p1
1598; CHECK-RV64C-NEXT:    lbu a0, 0(a0)
1599; CHECK-RV64C-NEXT:    ret
1600;
1601; CHECK-RV32C-LABEL: test_nontemporal_P1_load_i8:
1602; CHECK-RV32C:       # %bb.0:
1603; CHECK-RV32C-NEXT:    c.ntl.p1
1604; CHECK-RV32C-NEXT:    lbu a0, 0(a0)
1605; CHECK-RV32C-NEXT:    ret
1606;
1607; CHECK-RV64V-LABEL: test_nontemporal_P1_load_i8:
1608; CHECK-RV64V:       # %bb.0:
1609; CHECK-RV64V-NEXT:    ntl.p1
1610; CHECK-RV64V-NEXT:    lbu a0, 0(a0)
1611; CHECK-RV64V-NEXT:    ret
1612;
1613; CHECK-RV32V-LABEL: test_nontemporal_P1_load_i8:
1614; CHECK-RV32V:       # %bb.0:
1615; CHECK-RV32V-NEXT:    ntl.p1
1616; CHECK-RV32V-NEXT:    lbu a0, 0(a0)
1617; CHECK-RV32V-NEXT:    ret
1618  %1 = load i8, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1619  ret i8 %1
1620}
1621
1622define half @test_nontemporal_P1_load_half(ptr %p) nounwind {
1623; CHECK-RV64-LABEL: test_nontemporal_P1_load_half:
1624; CHECK-RV64:       # %bb.0:
1625; CHECK-RV64-NEXT:    ntl.p1
1626; CHECK-RV64-NEXT:    flh fa5, 0(a0)
1627; CHECK-RV64-NEXT:    ntl.p1
1628; CHECK-RV64-NEXT:    flh fa4, 6(a0)
1629; CHECK-RV64-NEXT:    fadd.h fa0, fa5, fa4
1630; CHECK-RV64-NEXT:    ret
1631;
1632; CHECK-RV32-LABEL: test_nontemporal_P1_load_half:
1633; CHECK-RV32:       # %bb.0:
1634; CHECK-RV32-NEXT:    ntl.p1
1635; CHECK-RV32-NEXT:    flh fa5, 0(a0)
1636; CHECK-RV32-NEXT:    ntl.p1
1637; CHECK-RV32-NEXT:    flh fa4, 6(a0)
1638; CHECK-RV32-NEXT:    fadd.h fa0, fa5, fa4
1639; CHECK-RV32-NEXT:    ret
1640;
1641; CHECK-RV64C-LABEL: test_nontemporal_P1_load_half:
1642; CHECK-RV64C:       # %bb.0:
1643; CHECK-RV64C-NEXT:    c.ntl.p1
1644; CHECK-RV64C-NEXT:    flh fa5, 0(a0)
1645; CHECK-RV64C-NEXT:    c.ntl.p1
1646; CHECK-RV64C-NEXT:    flh fa4, 6(a0)
1647; CHECK-RV64C-NEXT:    fadd.h fa0, fa5, fa4
1648; CHECK-RV64C-NEXT:    ret
1649;
1650; CHECK-RV32C-LABEL: test_nontemporal_P1_load_half:
1651; CHECK-RV32C:       # %bb.0:
1652; CHECK-RV32C-NEXT:    c.ntl.p1
1653; CHECK-RV32C-NEXT:    flh fa5, 0(a0)
1654; CHECK-RV32C-NEXT:    c.ntl.p1
1655; CHECK-RV32C-NEXT:    flh fa4, 6(a0)
1656; CHECK-RV32C-NEXT:    fadd.h fa0, fa5, fa4
1657; CHECK-RV32C-NEXT:    ret
1658;
1659; CHECK-RV64V-LABEL: test_nontemporal_P1_load_half:
1660; CHECK-RV64V:       # %bb.0:
1661; CHECK-RV64V-NEXT:    ntl.p1
1662; CHECK-RV64V-NEXT:    flh fa5, 0(a0)
1663; CHECK-RV64V-NEXT:    ntl.p1
1664; CHECK-RV64V-NEXT:    flh fa4, 6(a0)
1665; CHECK-RV64V-NEXT:    fadd.h fa0, fa5, fa4
1666; CHECK-RV64V-NEXT:    ret
1667;
1668; CHECK-RV32V-LABEL: test_nontemporal_P1_load_half:
1669; CHECK-RV32V:       # %bb.0:
1670; CHECK-RV32V-NEXT:    ntl.p1
1671; CHECK-RV32V-NEXT:    flh fa5, 0(a0)
1672; CHECK-RV32V-NEXT:    ntl.p1
1673; CHECK-RV32V-NEXT:    flh fa4, 6(a0)
1674; CHECK-RV32V-NEXT:    fadd.h fa0, fa5, fa4
1675; CHECK-RV32V-NEXT:    ret
1676  %1 = load half, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1677  %2 = getelementptr half, ptr %p, i32 3
1678  %3 = load half, ptr %2, !nontemporal !0, !riscv-nontemporal-domain !1
1679  %4 = fadd half %1, %3
1680  ret half %4
1681}
1682
1683define float @test_nontemporal_P1_load_float(ptr %p) {
1684; CHECK-RV64-LABEL: test_nontemporal_P1_load_float:
1685; CHECK-RV64:       # %bb.0:
1686; CHECK-RV64-NEXT:    ntl.p1
1687; CHECK-RV64-NEXT:    flw fa0, 0(a0)
1688; CHECK-RV64-NEXT:    ret
1689;
1690; CHECK-RV32-LABEL: test_nontemporal_P1_load_float:
1691; CHECK-RV32:       # %bb.0:
1692; CHECK-RV32-NEXT:    ntl.p1
1693; CHECK-RV32-NEXT:    flw fa0, 0(a0)
1694; CHECK-RV32-NEXT:    ret
1695;
1696; CHECK-RV64C-LABEL: test_nontemporal_P1_load_float:
1697; CHECK-RV64C:       # %bb.0:
1698; CHECK-RV64C-NEXT:    c.ntl.p1
1699; CHECK-RV64C-NEXT:    flw fa0, 0(a0)
1700; CHECK-RV64C-NEXT:    ret
1701;
1702; CHECK-RV32C-LABEL: test_nontemporal_P1_load_float:
1703; CHECK-RV32C:       # %bb.0:
1704; CHECK-RV32C-NEXT:    c.ntl.p1
1705; CHECK-RV32C-NEXT:    flw fa0, 0(a0)
1706; CHECK-RV32C-NEXT:    ret
1707;
1708; CHECK-RV64V-LABEL: test_nontemporal_P1_load_float:
1709; CHECK-RV64V:       # %bb.0:
1710; CHECK-RV64V-NEXT:    ntl.p1
1711; CHECK-RV64V-NEXT:    flw fa0, 0(a0)
1712; CHECK-RV64V-NEXT:    ret
1713;
1714; CHECK-RV32V-LABEL: test_nontemporal_P1_load_float:
1715; CHECK-RV32V:       # %bb.0:
1716; CHECK-RV32V-NEXT:    ntl.p1
1717; CHECK-RV32V-NEXT:    flw fa0, 0(a0)
1718; CHECK-RV32V-NEXT:    ret
1719  %1 = load float, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1720  ret float %1
1721}
1722
1723define double @test_nontemporal_P1_load_double(ptr %p) {
1724; CHECK-RV64-LABEL: test_nontemporal_P1_load_double:
1725; CHECK-RV64:       # %bb.0:
1726; CHECK-RV64-NEXT:    ntl.p1
1727; CHECK-RV64-NEXT:    fld fa0, 0(a0)
1728; CHECK-RV64-NEXT:    ret
1729;
1730; CHECK-RV32-LABEL: test_nontemporal_P1_load_double:
1731; CHECK-RV32:       # %bb.0:
1732; CHECK-RV32-NEXT:    ntl.p1
1733; CHECK-RV32-NEXT:    fld fa0, 0(a0)
1734; CHECK-RV32-NEXT:    ret
1735;
1736; CHECK-RV64C-LABEL: test_nontemporal_P1_load_double:
1737; CHECK-RV64C:       # %bb.0:
1738; CHECK-RV64C-NEXT:    c.ntl.p1
1739; CHECK-RV64C-NEXT:    fld fa0, 0(a0)
1740; CHECK-RV64C-NEXT:    ret
1741;
1742; CHECK-RV32C-LABEL: test_nontemporal_P1_load_double:
1743; CHECK-RV32C:       # %bb.0:
1744; CHECK-RV32C-NEXT:    c.ntl.p1
1745; CHECK-RV32C-NEXT:    fld fa0, 0(a0)
1746; CHECK-RV32C-NEXT:    ret
1747;
1748; CHECK-RV64V-LABEL: test_nontemporal_P1_load_double:
1749; CHECK-RV64V:       # %bb.0:
1750; CHECK-RV64V-NEXT:    ntl.p1
1751; CHECK-RV64V-NEXT:    fld fa0, 0(a0)
1752; CHECK-RV64V-NEXT:    ret
1753;
1754; CHECK-RV32V-LABEL: test_nontemporal_P1_load_double:
1755; CHECK-RV32V:       # %bb.0:
1756; CHECK-RV32V-NEXT:    ntl.p1
1757; CHECK-RV32V-NEXT:    fld fa0, 0(a0)
1758; CHECK-RV32V-NEXT:    ret
1759  %1 = load double, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1760  ret double %1
1761}
1762
1763define <16 x i8> @test_nontemporal_P1_load_v16i8(ptr %p) {
1764; CHECK-RV64-LABEL: test_nontemporal_P1_load_v16i8:
1765; CHECK-RV64:       # %bb.0:
1766; CHECK-RV64-NEXT:    ntl.p1
1767; CHECK-RV64-NEXT:    ld a2, 0(a1)
1768; CHECK-RV64-NEXT:    ntl.p1
1769; CHECK-RV64-NEXT:    ld a1, 8(a1)
1770; CHECK-RV64-NEXT:    sd a2, 0(a0)
1771; CHECK-RV64-NEXT:    sd a1, 8(a0)
1772; CHECK-RV64-NEXT:    ret
1773;
1774; CHECK-RV32-LABEL: test_nontemporal_P1_load_v16i8:
1775; CHECK-RV32:       # %bb.0:
1776; CHECK-RV32-NEXT:    ntl.p1
1777; CHECK-RV32-NEXT:    lw a2, 0(a1)
1778; CHECK-RV32-NEXT:    ntl.p1
1779; CHECK-RV32-NEXT:    lw a3, 4(a1)
1780; CHECK-RV32-NEXT:    ntl.p1
1781; CHECK-RV32-NEXT:    lw a4, 8(a1)
1782; CHECK-RV32-NEXT:    ntl.p1
1783; CHECK-RV32-NEXT:    lw a1, 12(a1)
1784; CHECK-RV32-NEXT:    sw a2, 0(a0)
1785; CHECK-RV32-NEXT:    sw a3, 4(a0)
1786; CHECK-RV32-NEXT:    sw a4, 8(a0)
1787; CHECK-RV32-NEXT:    sw a1, 12(a0)
1788; CHECK-RV32-NEXT:    ret
1789;
1790; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v16i8:
1791; CHECK-RV64C:       # %bb.0:
1792; CHECK-RV64C-NEXT:    c.ntl.p1
1793; CHECK-RV64C-NEXT:    ld a2, 0(a1)
1794; CHECK-RV64C-NEXT:    c.ntl.p1
1795; CHECK-RV64C-NEXT:    ld a1, 8(a1)
1796; CHECK-RV64C-NEXT:    sd a2, 0(a0)
1797; CHECK-RV64C-NEXT:    sd a1, 8(a0)
1798; CHECK-RV64C-NEXT:    ret
1799;
1800; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v16i8:
1801; CHECK-RV32C:       # %bb.0:
1802; CHECK-RV32C-NEXT:    c.ntl.p1
1803; CHECK-RV32C-NEXT:    lw a2, 0(a1)
1804; CHECK-RV32C-NEXT:    c.ntl.p1
1805; CHECK-RV32C-NEXT:    lw a3, 4(a1)
1806; CHECK-RV32C-NEXT:    c.ntl.p1
1807; CHECK-RV32C-NEXT:    lw a4, 8(a1)
1808; CHECK-RV32C-NEXT:    c.ntl.p1
1809; CHECK-RV32C-NEXT:    lw a1, 12(a1)
1810; CHECK-RV32C-NEXT:    sw a2, 0(a0)
1811; CHECK-RV32C-NEXT:    sw a3, 4(a0)
1812; CHECK-RV32C-NEXT:    sw a4, 8(a0)
1813; CHECK-RV32C-NEXT:    sw a1, 12(a0)
1814; CHECK-RV32C-NEXT:    ret
1815;
1816; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v16i8:
1817; CHECK-RV64V:       # %bb.0:
1818; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
1819; CHECK-RV64V-NEXT:    ntl.p1
1820; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
1821; CHECK-RV64V-NEXT:    ret
1822;
1823; CHECK-RV32V-LABEL: test_nontemporal_P1_load_v16i8:
1824; CHECK-RV32V:       # %bb.0:
1825; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
1826; CHECK-RV32V-NEXT:    ntl.p1
1827; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
1828; CHECK-RV32V-NEXT:    ret
1829  %1 = load <16 x i8>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1830  ret <16 x i8> %1
1831}
1832
1833define <8 x i16> @test_nontemporal_P1_load_v8i16(ptr %p) {
1834; CHECK-RV64-LABEL: test_nontemporal_P1_load_v8i16:
1835; CHECK-RV64:       # %bb.0:
1836; CHECK-RV64-NEXT:    ntl.p1
1837; CHECK-RV64-NEXT:    ld a2, 0(a1)
1838; CHECK-RV64-NEXT:    ntl.p1
1839; CHECK-RV64-NEXT:    ld a1, 8(a1)
1840; CHECK-RV64-NEXT:    sd a2, 0(a0)
1841; CHECK-RV64-NEXT:    sd a1, 8(a0)
1842; CHECK-RV64-NEXT:    ret
1843;
1844; CHECK-RV32-LABEL: test_nontemporal_P1_load_v8i16:
1845; CHECK-RV32:       # %bb.0:
1846; CHECK-RV32-NEXT:    ntl.p1
1847; CHECK-RV32-NEXT:    lw a2, 0(a1)
1848; CHECK-RV32-NEXT:    ntl.p1
1849; CHECK-RV32-NEXT:    lw a3, 4(a1)
1850; CHECK-RV32-NEXT:    ntl.p1
1851; CHECK-RV32-NEXT:    lw a4, 8(a1)
1852; CHECK-RV32-NEXT:    ntl.p1
1853; CHECK-RV32-NEXT:    lw a1, 12(a1)
1854; CHECK-RV32-NEXT:    sw a2, 0(a0)
1855; CHECK-RV32-NEXT:    sw a3, 4(a0)
1856; CHECK-RV32-NEXT:    sw a4, 8(a0)
1857; CHECK-RV32-NEXT:    sw a1, 12(a0)
1858; CHECK-RV32-NEXT:    ret
1859;
1860; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v8i16:
1861; CHECK-RV64C:       # %bb.0:
1862; CHECK-RV64C-NEXT:    c.ntl.p1
1863; CHECK-RV64C-NEXT:    ld a2, 0(a1)
1864; CHECK-RV64C-NEXT:    c.ntl.p1
1865; CHECK-RV64C-NEXT:    ld a1, 8(a1)
1866; CHECK-RV64C-NEXT:    sd a2, 0(a0)
1867; CHECK-RV64C-NEXT:    sd a1, 8(a0)
1868; CHECK-RV64C-NEXT:    ret
1869;
1870; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v8i16:
1871; CHECK-RV32C:       # %bb.0:
1872; CHECK-RV32C-NEXT:    c.ntl.p1
1873; CHECK-RV32C-NEXT:    lw a2, 0(a1)
1874; CHECK-RV32C-NEXT:    c.ntl.p1
1875; CHECK-RV32C-NEXT:    lw a3, 4(a1)
1876; CHECK-RV32C-NEXT:    c.ntl.p1
1877; CHECK-RV32C-NEXT:    lw a4, 8(a1)
1878; CHECK-RV32C-NEXT:    c.ntl.p1
1879; CHECK-RV32C-NEXT:    lw a1, 12(a1)
1880; CHECK-RV32C-NEXT:    sw a2, 0(a0)
1881; CHECK-RV32C-NEXT:    sw a3, 4(a0)
1882; CHECK-RV32C-NEXT:    sw a4, 8(a0)
1883; CHECK-RV32C-NEXT:    sw a1, 12(a0)
1884; CHECK-RV32C-NEXT:    ret
1885;
1886; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v8i16:
1887; CHECK-RV64V:       # %bb.0:
1888; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1889; CHECK-RV64V-NEXT:    ntl.p1
1890; CHECK-RV64V-NEXT:    vle16.v v8, (a0)
1891; CHECK-RV64V-NEXT:    ret
1892;
1893; CHECK-RV32V-LABEL: test_nontemporal_P1_load_v8i16:
1894; CHECK-RV32V:       # %bb.0:
1895; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
1896; CHECK-RV32V-NEXT:    ntl.p1
1897; CHECK-RV32V-NEXT:    vle16.v v8, (a0)
1898; CHECK-RV32V-NEXT:    ret
1899  %1 = load <8 x i16>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1900  ret <8 x i16> %1
1901}
1902
1903define <4 x i32> @test_nontemporal_P1_load_v4i32(ptr %p) {
1904; CHECK-RV64-LABEL: test_nontemporal_P1_load_v4i32:
1905; CHECK-RV64:       # %bb.0:
1906; CHECK-RV64-NEXT:    ntl.p1
1907; CHECK-RV64-NEXT:    ld a2, 0(a1)
1908; CHECK-RV64-NEXT:    ntl.p1
1909; CHECK-RV64-NEXT:    ld a1, 8(a1)
1910; CHECK-RV64-NEXT:    sd a2, 0(a0)
1911; CHECK-RV64-NEXT:    sd a1, 8(a0)
1912; CHECK-RV64-NEXT:    ret
1913;
1914; CHECK-RV32-LABEL: test_nontemporal_P1_load_v4i32:
1915; CHECK-RV32:       # %bb.0:
1916; CHECK-RV32-NEXT:    ntl.p1
1917; CHECK-RV32-NEXT:    lw a2, 0(a1)
1918; CHECK-RV32-NEXT:    ntl.p1
1919; CHECK-RV32-NEXT:    lw a3, 4(a1)
1920; CHECK-RV32-NEXT:    ntl.p1
1921; CHECK-RV32-NEXT:    lw a4, 8(a1)
1922; CHECK-RV32-NEXT:    ntl.p1
1923; CHECK-RV32-NEXT:    lw a1, 12(a1)
1924; CHECK-RV32-NEXT:    sw a2, 0(a0)
1925; CHECK-RV32-NEXT:    sw a3, 4(a0)
1926; CHECK-RV32-NEXT:    sw a4, 8(a0)
1927; CHECK-RV32-NEXT:    sw a1, 12(a0)
1928; CHECK-RV32-NEXT:    ret
1929;
1930; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v4i32:
1931; CHECK-RV64C:       # %bb.0:
1932; CHECK-RV64C-NEXT:    c.ntl.p1
1933; CHECK-RV64C-NEXT:    ld a2, 0(a1)
1934; CHECK-RV64C-NEXT:    c.ntl.p1
1935; CHECK-RV64C-NEXT:    ld a1, 8(a1)
1936; CHECK-RV64C-NEXT:    sd a2, 0(a0)
1937; CHECK-RV64C-NEXT:    sd a1, 8(a0)
1938; CHECK-RV64C-NEXT:    ret
1939;
1940; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v4i32:
1941; CHECK-RV32C:       # %bb.0:
1942; CHECK-RV32C-NEXT:    c.ntl.p1
1943; CHECK-RV32C-NEXT:    lw a2, 0(a1)
1944; CHECK-RV32C-NEXT:    c.ntl.p1
1945; CHECK-RV32C-NEXT:    lw a3, 4(a1)
1946; CHECK-RV32C-NEXT:    c.ntl.p1
1947; CHECK-RV32C-NEXT:    lw a4, 8(a1)
1948; CHECK-RV32C-NEXT:    c.ntl.p1
1949; CHECK-RV32C-NEXT:    lw a1, 12(a1)
1950; CHECK-RV32C-NEXT:    sw a2, 0(a0)
1951; CHECK-RV32C-NEXT:    sw a3, 4(a0)
1952; CHECK-RV32C-NEXT:    sw a4, 8(a0)
1953; CHECK-RV32C-NEXT:    sw a1, 12(a0)
1954; CHECK-RV32C-NEXT:    ret
1955;
1956; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v4i32:
1957; CHECK-RV64V:       # %bb.0:
1958; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1959; CHECK-RV64V-NEXT:    ntl.p1
1960; CHECK-RV64V-NEXT:    vle32.v v8, (a0)
1961; CHECK-RV64V-NEXT:    ret
1962;
1963; CHECK-RV32V-LABEL: test_nontemporal_P1_load_v4i32:
1964; CHECK-RV32V:       # %bb.0:
1965; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
1966; CHECK-RV32V-NEXT:    ntl.p1
1967; CHECK-RV32V-NEXT:    vle32.v v8, (a0)
1968; CHECK-RV32V-NEXT:    ret
1969  %1 = load <4 x i32>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
1970  ret <4 x i32> %1
1971}
1972
1973define <2 x i64> @test_nontemporal_P1_load_v2i64(ptr %p) {
1974; CHECK-RV64-LABEL: test_nontemporal_P1_load_v2i64:
1975; CHECK-RV64:       # %bb.0:
1976; CHECK-RV64-NEXT:    ntl.p1
1977; CHECK-RV64-NEXT:    ld a2, 0(a0)
1978; CHECK-RV64-NEXT:    ntl.p1
1979; CHECK-RV64-NEXT:    ld a1, 8(a0)
1980; CHECK-RV64-NEXT:    mv a0, a2
1981; CHECK-RV64-NEXT:    ret
1982;
1983; CHECK-RV32-LABEL: test_nontemporal_P1_load_v2i64:
1984; CHECK-RV32:       # %bb.0:
1985; CHECK-RV32-NEXT:    ntl.p1
1986; CHECK-RV32-NEXT:    lw a2, 0(a1)
1987; CHECK-RV32-NEXT:    ntl.p1
1988; CHECK-RV32-NEXT:    lw a3, 4(a1)
1989; CHECK-RV32-NEXT:    ntl.p1
1990; CHECK-RV32-NEXT:    lw a4, 8(a1)
1991; CHECK-RV32-NEXT:    ntl.p1
1992; CHECK-RV32-NEXT:    lw a1, 12(a1)
1993; CHECK-RV32-NEXT:    sw a2, 0(a0)
1994; CHECK-RV32-NEXT:    sw a3, 4(a0)
1995; CHECK-RV32-NEXT:    sw a4, 8(a0)
1996; CHECK-RV32-NEXT:    sw a1, 12(a0)
1997; CHECK-RV32-NEXT:    ret
1998;
1999; CHECK-RV64C-LABEL: test_nontemporal_P1_load_v2i64:
2000; CHECK-RV64C:       # %bb.0:
2001; CHECK-RV64C-NEXT:    c.ntl.p1
2002; CHECK-RV64C-NEXT:    ld a2, 0(a0)
2003; CHECK-RV64C-NEXT:    c.ntl.p1
2004; CHECK-RV64C-NEXT:    ld a1, 8(a0)
2005; CHECK-RV64C-NEXT:    mv a0, a2
2006; CHECK-RV64C-NEXT:    ret
2007;
2008; CHECK-RV32C-LABEL: test_nontemporal_P1_load_v2i64:
2009; CHECK-RV32C:       # %bb.0:
2010; CHECK-RV32C-NEXT:    c.ntl.p1
2011; CHECK-RV32C-NEXT:    lw a2, 0(a1)
2012; CHECK-RV32C-NEXT:    c.ntl.p1
2013; CHECK-RV32C-NEXT:    lw a3, 4(a1)
2014; CHECK-RV32C-NEXT:    c.ntl.p1
2015; CHECK-RV32C-NEXT:    lw a4, 8(a1)
2016; CHECK-RV32C-NEXT:    c.ntl.p1
2017; CHECK-RV32C-NEXT:    lw a1, 12(a1)
2018; CHECK-RV32C-NEXT:    sw a2, 0(a0)
2019; CHECK-RV32C-NEXT:    sw a3, 4(a0)
2020; CHECK-RV32C-NEXT:    sw a4, 8(a0)
2021; CHECK-RV32C-NEXT:    sw a1, 12(a0)
2022; CHECK-RV32C-NEXT:    ret
2023;
2024; CHECK-RV64V-LABEL: test_nontemporal_P1_load_v2i64:
2025; CHECK-RV64V:       # %bb.0:
2026; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2027; CHECK-RV64V-NEXT:    ntl.p1
2028; CHECK-RV64V-NEXT:    vle64.v v8, (a0)
2029; CHECK-RV64V-NEXT:    ret
2030;
2031; CHECK-RV32V-LABEL: test_nontemporal_P1_load_v2i64:
2032; CHECK-RV32V:       # %bb.0:
2033; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2034; CHECK-RV32V-NEXT:    ntl.p1
2035; CHECK-RV32V-NEXT:    vle64.v v8, (a0)
2036; CHECK-RV32V-NEXT:    ret
2037  %1 = load <2 x i64>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2038  ret <2 x i64> %1
2039}
2040
2041define void @test_nontemporal_P1_store_i64(ptr %p, i64 %v) {
2042; CHECK-RV64-LABEL: test_nontemporal_P1_store_i64:
2043; CHECK-RV64:       # %bb.0:
2044; CHECK-RV64-NEXT:    ntl.p1
2045; CHECK-RV64-NEXT:    sd a1, 0(a0)
2046; CHECK-RV64-NEXT:    ret
2047;
2048; CHECK-RV32-LABEL: test_nontemporal_P1_store_i64:
2049; CHECK-RV32:       # %bb.0:
2050; CHECK-RV32-NEXT:    ntl.p1
2051; CHECK-RV32-NEXT:    sw a1, 0(a0)
2052; CHECK-RV32-NEXT:    ntl.p1
2053; CHECK-RV32-NEXT:    sw a2, 4(a0)
2054; CHECK-RV32-NEXT:    ret
2055;
2056; CHECK-RV64C-LABEL: test_nontemporal_P1_store_i64:
2057; CHECK-RV64C:       # %bb.0:
2058; CHECK-RV64C-NEXT:    c.ntl.p1
2059; CHECK-RV64C-NEXT:    sd a1, 0(a0)
2060; CHECK-RV64C-NEXT:    ret
2061;
2062; CHECK-RV32C-LABEL: test_nontemporal_P1_store_i64:
2063; CHECK-RV32C:       # %bb.0:
2064; CHECK-RV32C-NEXT:    c.ntl.p1
2065; CHECK-RV32C-NEXT:    sw a1, 0(a0)
2066; CHECK-RV32C-NEXT:    c.ntl.p1
2067; CHECK-RV32C-NEXT:    sw a2, 4(a0)
2068; CHECK-RV32C-NEXT:    ret
2069;
2070; CHECK-RV64V-LABEL: test_nontemporal_P1_store_i64:
2071; CHECK-RV64V:       # %bb.0:
2072; CHECK-RV64V-NEXT:    ntl.p1
2073; CHECK-RV64V-NEXT:    sd a1, 0(a0)
2074; CHECK-RV64V-NEXT:    ret
2075;
2076; CHECK-RV32V-LABEL: test_nontemporal_P1_store_i64:
2077; CHECK-RV32V:       # %bb.0:
2078; CHECK-RV32V-NEXT:    ntl.p1
2079; CHECK-RV32V-NEXT:    sw a1, 0(a0)
2080; CHECK-RV32V-NEXT:    ntl.p1
2081; CHECK-RV32V-NEXT:    sw a2, 4(a0)
2082; CHECK-RV32V-NEXT:    ret
2083  store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2084  ret void
2085}
2086
2087define void @test_nontemporal_P1_store_i32(ptr %p, i32 %v) {
2088; CHECK-RV64-LABEL: test_nontemporal_P1_store_i32:
2089; CHECK-RV64:       # %bb.0:
2090; CHECK-RV64-NEXT:    ntl.p1
2091; CHECK-RV64-NEXT:    sw a1, 0(a0)
2092; CHECK-RV64-NEXT:    ret
2093;
2094; CHECK-RV32-LABEL: test_nontemporal_P1_store_i32:
2095; CHECK-RV32:       # %bb.0:
2096; CHECK-RV32-NEXT:    ntl.p1
2097; CHECK-RV32-NEXT:    sw a1, 0(a0)
2098; CHECK-RV32-NEXT:    ret
2099;
2100; CHECK-RV64C-LABEL: test_nontemporal_P1_store_i32:
2101; CHECK-RV64C:       # %bb.0:
2102; CHECK-RV64C-NEXT:    c.ntl.p1
2103; CHECK-RV64C-NEXT:    sw a1, 0(a0)
2104; CHECK-RV64C-NEXT:    ret
2105;
2106; CHECK-RV32C-LABEL: test_nontemporal_P1_store_i32:
2107; CHECK-RV32C:       # %bb.0:
2108; CHECK-RV32C-NEXT:    c.ntl.p1
2109; CHECK-RV32C-NEXT:    sw a1, 0(a0)
2110; CHECK-RV32C-NEXT:    ret
2111;
2112; CHECK-RV64V-LABEL: test_nontemporal_P1_store_i32:
2113; CHECK-RV64V:       # %bb.0:
2114; CHECK-RV64V-NEXT:    ntl.p1
2115; CHECK-RV64V-NEXT:    sw a1, 0(a0)
2116; CHECK-RV64V-NEXT:    ret
2117;
2118; CHECK-RV32V-LABEL: test_nontemporal_P1_store_i32:
2119; CHECK-RV32V:       # %bb.0:
2120; CHECK-RV32V-NEXT:    ntl.p1
2121; CHECK-RV32V-NEXT:    sw a1, 0(a0)
2122; CHECK-RV32V-NEXT:    ret
2123  store i32 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2124  ret void
2125}
2126
2127define void @test_nontemporal_P1_store_i16(ptr %p, i16 %v) {
2128; CHECK-RV64-LABEL: test_nontemporal_P1_store_i16:
2129; CHECK-RV64:       # %bb.0:
2130; CHECK-RV64-NEXT:    ntl.p1
2131; CHECK-RV64-NEXT:    sh a1, 0(a0)
2132; CHECK-RV64-NEXT:    ret
2133;
2134; CHECK-RV32-LABEL: test_nontemporal_P1_store_i16:
2135; CHECK-RV32:       # %bb.0:
2136; CHECK-RV32-NEXT:    ntl.p1
2137; CHECK-RV32-NEXT:    sh a1, 0(a0)
2138; CHECK-RV32-NEXT:    ret
2139;
2140; CHECK-RV64C-LABEL: test_nontemporal_P1_store_i16:
2141; CHECK-RV64C:       # %bb.0:
2142; CHECK-RV64C-NEXT:    c.ntl.p1
2143; CHECK-RV64C-NEXT:    sh a1, 0(a0)
2144; CHECK-RV64C-NEXT:    ret
2145;
2146; CHECK-RV32C-LABEL: test_nontemporal_P1_store_i16:
2147; CHECK-RV32C:       # %bb.0:
2148; CHECK-RV32C-NEXT:    c.ntl.p1
2149; CHECK-RV32C-NEXT:    sh a1, 0(a0)
2150; CHECK-RV32C-NEXT:    ret
2151;
2152; CHECK-RV64V-LABEL: test_nontemporal_P1_store_i16:
2153; CHECK-RV64V:       # %bb.0:
2154; CHECK-RV64V-NEXT:    ntl.p1
2155; CHECK-RV64V-NEXT:    sh a1, 0(a0)
2156; CHECK-RV64V-NEXT:    ret
2157;
2158; CHECK-RV32V-LABEL: test_nontemporal_P1_store_i16:
2159; CHECK-RV32V:       # %bb.0:
2160; CHECK-RV32V-NEXT:    ntl.p1
2161; CHECK-RV32V-NEXT:    sh a1, 0(a0)
2162; CHECK-RV32V-NEXT:    ret
2163  store i16 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2164  ret void
2165}
2166
2167define void @test_nontemporal_P1_store_i8(ptr %p, i8 %v) {
2168; CHECK-RV64-LABEL: test_nontemporal_P1_store_i8:
2169; CHECK-RV64:       # %bb.0:
2170; CHECK-RV64-NEXT:    ntl.p1
2171; CHECK-RV64-NEXT:    sb a1, 0(a0)
2172; CHECK-RV64-NEXT:    ret
2173;
2174; CHECK-RV32-LABEL: test_nontemporal_P1_store_i8:
2175; CHECK-RV32:       # %bb.0:
2176; CHECK-RV32-NEXT:    ntl.p1
2177; CHECK-RV32-NEXT:    sb a1, 0(a0)
2178; CHECK-RV32-NEXT:    ret
2179;
2180; CHECK-RV64C-LABEL: test_nontemporal_P1_store_i8:
2181; CHECK-RV64C:       # %bb.0:
2182; CHECK-RV64C-NEXT:    c.ntl.p1
2183; CHECK-RV64C-NEXT:    sb a1, 0(a0)
2184; CHECK-RV64C-NEXT:    ret
2185;
2186; CHECK-RV32C-LABEL: test_nontemporal_P1_store_i8:
2187; CHECK-RV32C:       # %bb.0:
2188; CHECK-RV32C-NEXT:    c.ntl.p1
2189; CHECK-RV32C-NEXT:    sb a1, 0(a0)
2190; CHECK-RV32C-NEXT:    ret
2191;
2192; CHECK-RV64V-LABEL: test_nontemporal_P1_store_i8:
2193; CHECK-RV64V:       # %bb.0:
2194; CHECK-RV64V-NEXT:    ntl.p1
2195; CHECK-RV64V-NEXT:    sb a1, 0(a0)
2196; CHECK-RV64V-NEXT:    ret
2197;
2198; CHECK-RV32V-LABEL: test_nontemporal_P1_store_i8:
2199; CHECK-RV32V:       # %bb.0:
2200; CHECK-RV32V-NEXT:    ntl.p1
2201; CHECK-RV32V-NEXT:    sb a1, 0(a0)
2202; CHECK-RV32V-NEXT:    ret
2203  store i8 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2204  ret void
2205}
2206
2207define void @test_nontemporal_P1_store_half(ptr %p, half %v) {
2208; CHECK-RV64-LABEL: test_nontemporal_P1_store_half:
2209; CHECK-RV64:       # %bb.0:
2210; CHECK-RV64-NEXT:    ntl.p1
2211; CHECK-RV64-NEXT:    fsh fa0, 0(a0)
2212; CHECK-RV64-NEXT:    ret
2213;
2214; CHECK-RV32-LABEL: test_nontemporal_P1_store_half:
2215; CHECK-RV32:       # %bb.0:
2216; CHECK-RV32-NEXT:    ntl.p1
2217; CHECK-RV32-NEXT:    fsh fa0, 0(a0)
2218; CHECK-RV32-NEXT:    ret
2219;
2220; CHECK-RV64C-LABEL: test_nontemporal_P1_store_half:
2221; CHECK-RV64C:       # %bb.0:
2222; CHECK-RV64C-NEXT:    c.ntl.p1
2223; CHECK-RV64C-NEXT:    fsh fa0, 0(a0)
2224; CHECK-RV64C-NEXT:    ret
2225;
2226; CHECK-RV32C-LABEL: test_nontemporal_P1_store_half:
2227; CHECK-RV32C:       # %bb.0:
2228; CHECK-RV32C-NEXT:    c.ntl.p1
2229; CHECK-RV32C-NEXT:    fsh fa0, 0(a0)
2230; CHECK-RV32C-NEXT:    ret
2231;
2232; CHECK-RV64V-LABEL: test_nontemporal_P1_store_half:
2233; CHECK-RV64V:       # %bb.0:
2234; CHECK-RV64V-NEXT:    ntl.p1
2235; CHECK-RV64V-NEXT:    fsh fa0, 0(a0)
2236; CHECK-RV64V-NEXT:    ret
2237;
2238; CHECK-RV32V-LABEL: test_nontemporal_P1_store_half:
2239; CHECK-RV32V:       # %bb.0:
2240; CHECK-RV32V-NEXT:    ntl.p1
2241; CHECK-RV32V-NEXT:    fsh fa0, 0(a0)
2242; CHECK-RV32V-NEXT:    ret
2243  store half %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2244  ret void
2245}
2246
2247define void @test_nontemporal_P1_store_float(ptr %p, float %v) {
2248; CHECK-RV64-LABEL: test_nontemporal_P1_store_float:
2249; CHECK-RV64:       # %bb.0:
2250; CHECK-RV64-NEXT:    ntl.p1
2251; CHECK-RV64-NEXT:    fsw fa0, 0(a0)
2252; CHECK-RV64-NEXT:    ret
2253;
2254; CHECK-RV32-LABEL: test_nontemporal_P1_store_float:
2255; CHECK-RV32:       # %bb.0:
2256; CHECK-RV32-NEXT:    ntl.p1
2257; CHECK-RV32-NEXT:    fsw fa0, 0(a0)
2258; CHECK-RV32-NEXT:    ret
2259;
2260; CHECK-RV64C-LABEL: test_nontemporal_P1_store_float:
2261; CHECK-RV64C:       # %bb.0:
2262; CHECK-RV64C-NEXT:    c.ntl.p1
2263; CHECK-RV64C-NEXT:    fsw fa0, 0(a0)
2264; CHECK-RV64C-NEXT:    ret
2265;
2266; CHECK-RV32C-LABEL: test_nontemporal_P1_store_float:
2267; CHECK-RV32C:       # %bb.0:
2268; CHECK-RV32C-NEXT:    c.ntl.p1
2269; CHECK-RV32C-NEXT:    fsw fa0, 0(a0)
2270; CHECK-RV32C-NEXT:    ret
2271;
2272; CHECK-RV64V-LABEL: test_nontemporal_P1_store_float:
2273; CHECK-RV64V:       # %bb.0:
2274; CHECK-RV64V-NEXT:    ntl.p1
2275; CHECK-RV64V-NEXT:    fsw fa0, 0(a0)
2276; CHECK-RV64V-NEXT:    ret
2277;
2278; CHECK-RV32V-LABEL: test_nontemporal_P1_store_float:
2279; CHECK-RV32V:       # %bb.0:
2280; CHECK-RV32V-NEXT:    ntl.p1
2281; CHECK-RV32V-NEXT:    fsw fa0, 0(a0)
2282; CHECK-RV32V-NEXT:    ret
2283  store float %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2284  ret void
2285}
2286
2287define void @test_nontemporal_P1_store_double(ptr %p, double %v) {
2288; CHECK-RV64-LABEL: test_nontemporal_P1_store_double:
2289; CHECK-RV64:       # %bb.0:
2290; CHECK-RV64-NEXT:    ntl.p1
2291; CHECK-RV64-NEXT:    fsd fa0, 0(a0)
2292; CHECK-RV64-NEXT:    ret
2293;
2294; CHECK-RV32-LABEL: test_nontemporal_P1_store_double:
2295; CHECK-RV32:       # %bb.0:
2296; CHECK-RV32-NEXT:    ntl.p1
2297; CHECK-RV32-NEXT:    fsd fa0, 0(a0)
2298; CHECK-RV32-NEXT:    ret
2299;
2300; CHECK-RV64C-LABEL: test_nontemporal_P1_store_double:
2301; CHECK-RV64C:       # %bb.0:
2302; CHECK-RV64C-NEXT:    c.ntl.p1
2303; CHECK-RV64C-NEXT:    fsd fa0, 0(a0)
2304; CHECK-RV64C-NEXT:    ret
2305;
2306; CHECK-RV32C-LABEL: test_nontemporal_P1_store_double:
2307; CHECK-RV32C:       # %bb.0:
2308; CHECK-RV32C-NEXT:    c.ntl.p1
2309; CHECK-RV32C-NEXT:    fsd fa0, 0(a0)
2310; CHECK-RV32C-NEXT:    ret
2311;
2312; CHECK-RV64V-LABEL: test_nontemporal_P1_store_double:
2313; CHECK-RV64V:       # %bb.0:
2314; CHECK-RV64V-NEXT:    ntl.p1
2315; CHECK-RV64V-NEXT:    fsd fa0, 0(a0)
2316; CHECK-RV64V-NEXT:    ret
2317;
2318; CHECK-RV32V-LABEL: test_nontemporal_P1_store_double:
2319; CHECK-RV32V:       # %bb.0:
2320; CHECK-RV32V-NEXT:    ntl.p1
2321; CHECK-RV32V-NEXT:    fsd fa0, 0(a0)
2322; CHECK-RV32V-NEXT:    ret
2323  store double %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2324  ret void
2325}
2326
2327define void @test_nontemporal_P1_store_v16i8(ptr %p, <16 x i8> %v) {
2328; CHECK-RV64-LABEL: test_nontemporal_P1_store_v16i8:
2329; CHECK-RV64:       # %bb.0:
2330; CHECK-RV64-NEXT:    addi sp, sp, -16
2331; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 16
2332; CHECK-RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
2333; CHECK-RV64-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
2334; CHECK-RV64-NEXT:    .cfi_offset s0, -8
2335; CHECK-RV64-NEXT:    .cfi_offset s1, -16
2336; CHECK-RV64-NEXT:    lbu a2, 0(a1)
2337; CHECK-RV64-NEXT:    lbu a3, 8(a1)
2338; CHECK-RV64-NEXT:    lbu a4, 16(a1)
2339; CHECK-RV64-NEXT:    lbu a5, 24(a1)
2340; CHECK-RV64-NEXT:    lbu a6, 32(a1)
2341; CHECK-RV64-NEXT:    lbu a7, 40(a1)
2342; CHECK-RV64-NEXT:    lbu t0, 48(a1)
2343; CHECK-RV64-NEXT:    lbu t1, 56(a1)
2344; CHECK-RV64-NEXT:    lbu t2, 64(a1)
2345; CHECK-RV64-NEXT:    lbu t3, 72(a1)
2346; CHECK-RV64-NEXT:    lbu t4, 80(a1)
2347; CHECK-RV64-NEXT:    lbu t5, 88(a1)
2348; CHECK-RV64-NEXT:    lbu t6, 96(a1)
2349; CHECK-RV64-NEXT:    lbu s0, 104(a1)
2350; CHECK-RV64-NEXT:    lbu s1, 112(a1)
2351; CHECK-RV64-NEXT:    lbu a1, 120(a1)
2352; CHECK-RV64-NEXT:    ntl.p1
2353; CHECK-RV64-NEXT:    sb t6, 12(a0)
2354; CHECK-RV64-NEXT:    ntl.p1
2355; CHECK-RV64-NEXT:    sb s0, 13(a0)
2356; CHECK-RV64-NEXT:    ntl.p1
2357; CHECK-RV64-NEXT:    sb s1, 14(a0)
2358; CHECK-RV64-NEXT:    ntl.p1
2359; CHECK-RV64-NEXT:    sb a1, 15(a0)
2360; CHECK-RV64-NEXT:    ntl.p1
2361; CHECK-RV64-NEXT:    sb t2, 8(a0)
2362; CHECK-RV64-NEXT:    ntl.p1
2363; CHECK-RV64-NEXT:    sb t3, 9(a0)
2364; CHECK-RV64-NEXT:    ntl.p1
2365; CHECK-RV64-NEXT:    sb t4, 10(a0)
2366; CHECK-RV64-NEXT:    ntl.p1
2367; CHECK-RV64-NEXT:    sb t5, 11(a0)
2368; CHECK-RV64-NEXT:    ntl.p1
2369; CHECK-RV64-NEXT:    sb a6, 4(a0)
2370; CHECK-RV64-NEXT:    ntl.p1
2371; CHECK-RV64-NEXT:    sb a7, 5(a0)
2372; CHECK-RV64-NEXT:    ntl.p1
2373; CHECK-RV64-NEXT:    sb t0, 6(a0)
2374; CHECK-RV64-NEXT:    ntl.p1
2375; CHECK-RV64-NEXT:    sb t1, 7(a0)
2376; CHECK-RV64-NEXT:    ntl.p1
2377; CHECK-RV64-NEXT:    sb a2, 0(a0)
2378; CHECK-RV64-NEXT:    ntl.p1
2379; CHECK-RV64-NEXT:    sb a3, 1(a0)
2380; CHECK-RV64-NEXT:    ntl.p1
2381; CHECK-RV64-NEXT:    sb a4, 2(a0)
2382; CHECK-RV64-NEXT:    ntl.p1
2383; CHECK-RV64-NEXT:    sb a5, 3(a0)
2384; CHECK-RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
2385; CHECK-RV64-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
2386; CHECK-RV64-NEXT:    .cfi_restore s0
2387; CHECK-RV64-NEXT:    .cfi_restore s1
2388; CHECK-RV64-NEXT:    addi sp, sp, 16
2389; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 0
2390; CHECK-RV64-NEXT:    ret
2391;
2392; CHECK-RV32-LABEL: test_nontemporal_P1_store_v16i8:
2393; CHECK-RV32:       # %bb.0:
2394; CHECK-RV32-NEXT:    addi sp, sp, -16
2395; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 16
2396; CHECK-RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
2397; CHECK-RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
2398; CHECK-RV32-NEXT:    .cfi_offset s0, -4
2399; CHECK-RV32-NEXT:    .cfi_offset s1, -8
2400; CHECK-RV32-NEXT:    lbu a2, 0(a1)
2401; CHECK-RV32-NEXT:    lbu a3, 4(a1)
2402; CHECK-RV32-NEXT:    lbu a4, 8(a1)
2403; CHECK-RV32-NEXT:    lbu a5, 12(a1)
2404; CHECK-RV32-NEXT:    lbu a6, 16(a1)
2405; CHECK-RV32-NEXT:    lbu a7, 20(a1)
2406; CHECK-RV32-NEXT:    lbu t0, 24(a1)
2407; CHECK-RV32-NEXT:    lbu t1, 28(a1)
2408; CHECK-RV32-NEXT:    lbu t2, 32(a1)
2409; CHECK-RV32-NEXT:    lbu t3, 36(a1)
2410; CHECK-RV32-NEXT:    lbu t4, 40(a1)
2411; CHECK-RV32-NEXT:    lbu t5, 44(a1)
2412; CHECK-RV32-NEXT:    lbu t6, 48(a1)
2413; CHECK-RV32-NEXT:    lbu s0, 52(a1)
2414; CHECK-RV32-NEXT:    lbu s1, 56(a1)
2415; CHECK-RV32-NEXT:    lbu a1, 60(a1)
2416; CHECK-RV32-NEXT:    ntl.p1
2417; CHECK-RV32-NEXT:    sb t6, 12(a0)
2418; CHECK-RV32-NEXT:    ntl.p1
2419; CHECK-RV32-NEXT:    sb s0, 13(a0)
2420; CHECK-RV32-NEXT:    ntl.p1
2421; CHECK-RV32-NEXT:    sb s1, 14(a0)
2422; CHECK-RV32-NEXT:    ntl.p1
2423; CHECK-RV32-NEXT:    sb a1, 15(a0)
2424; CHECK-RV32-NEXT:    ntl.p1
2425; CHECK-RV32-NEXT:    sb t2, 8(a0)
2426; CHECK-RV32-NEXT:    ntl.p1
2427; CHECK-RV32-NEXT:    sb t3, 9(a0)
2428; CHECK-RV32-NEXT:    ntl.p1
2429; CHECK-RV32-NEXT:    sb t4, 10(a0)
2430; CHECK-RV32-NEXT:    ntl.p1
2431; CHECK-RV32-NEXT:    sb t5, 11(a0)
2432; CHECK-RV32-NEXT:    ntl.p1
2433; CHECK-RV32-NEXT:    sb a6, 4(a0)
2434; CHECK-RV32-NEXT:    ntl.p1
2435; CHECK-RV32-NEXT:    sb a7, 5(a0)
2436; CHECK-RV32-NEXT:    ntl.p1
2437; CHECK-RV32-NEXT:    sb t0, 6(a0)
2438; CHECK-RV32-NEXT:    ntl.p1
2439; CHECK-RV32-NEXT:    sb t1, 7(a0)
2440; CHECK-RV32-NEXT:    ntl.p1
2441; CHECK-RV32-NEXT:    sb a2, 0(a0)
2442; CHECK-RV32-NEXT:    ntl.p1
2443; CHECK-RV32-NEXT:    sb a3, 1(a0)
2444; CHECK-RV32-NEXT:    ntl.p1
2445; CHECK-RV32-NEXT:    sb a4, 2(a0)
2446; CHECK-RV32-NEXT:    ntl.p1
2447; CHECK-RV32-NEXT:    sb a5, 3(a0)
2448; CHECK-RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
2449; CHECK-RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
2450; CHECK-RV32-NEXT:    .cfi_restore s0
2451; CHECK-RV32-NEXT:    .cfi_restore s1
2452; CHECK-RV32-NEXT:    addi sp, sp, 16
2453; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 0
2454; CHECK-RV32-NEXT:    ret
2455;
2456; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v16i8:
2457; CHECK-RV64C:       # %bb.0:
2458; CHECK-RV64C-NEXT:    addi sp, sp, -16
2459; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 16
2460; CHECK-RV64C-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
2461; CHECK-RV64C-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
2462; CHECK-RV64C-NEXT:    .cfi_offset s0, -8
2463; CHECK-RV64C-NEXT:    .cfi_offset s1, -16
2464; CHECK-RV64C-NEXT:    lbu a6, 0(a1)
2465; CHECK-RV64C-NEXT:    lbu a7, 8(a1)
2466; CHECK-RV64C-NEXT:    lbu t0, 16(a1)
2467; CHECK-RV64C-NEXT:    lbu t1, 24(a1)
2468; CHECK-RV64C-NEXT:    lbu t2, 32(a1)
2469; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
2470; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
2471; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
2472; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
2473; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
2474; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
2475; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
2476; CHECK-RV64C-NEXT:    lbu a2, 96(a1)
2477; CHECK-RV64C-NEXT:    lbu s0, 104(a1)
2478; CHECK-RV64C-NEXT:    lbu s1, 112(a1)
2479; CHECK-RV64C-NEXT:    lbu a1, 120(a1)
2480; CHECK-RV64C-NEXT:    c.ntl.p1
2481; CHECK-RV64C-NEXT:    sb a2, 12(a0)
2482; CHECK-RV64C-NEXT:    c.ntl.p1
2483; CHECK-RV64C-NEXT:    sb s0, 13(a0)
2484; CHECK-RV64C-NEXT:    c.ntl.p1
2485; CHECK-RV64C-NEXT:    sb s1, 14(a0)
2486; CHECK-RV64C-NEXT:    c.ntl.p1
2487; CHECK-RV64C-NEXT:    sb a1, 15(a0)
2488; CHECK-RV64C-NEXT:    c.ntl.p1
2489; CHECK-RV64C-NEXT:    sb t6, 8(a0)
2490; CHECK-RV64C-NEXT:    c.ntl.p1
2491; CHECK-RV64C-NEXT:    sb a3, 9(a0)
2492; CHECK-RV64C-NEXT:    c.ntl.p1
2493; CHECK-RV64C-NEXT:    sb a4, 10(a0)
2494; CHECK-RV64C-NEXT:    c.ntl.p1
2495; CHECK-RV64C-NEXT:    sb a5, 11(a0)
2496; CHECK-RV64C-NEXT:    c.ntl.p1
2497; CHECK-RV64C-NEXT:    sb t2, 4(a0)
2498; CHECK-RV64C-NEXT:    c.ntl.p1
2499; CHECK-RV64C-NEXT:    sb t3, 5(a0)
2500; CHECK-RV64C-NEXT:    c.ntl.p1
2501; CHECK-RV64C-NEXT:    sb t4, 6(a0)
2502; CHECK-RV64C-NEXT:    c.ntl.p1
2503; CHECK-RV64C-NEXT:    sb t5, 7(a0)
2504; CHECK-RV64C-NEXT:    c.ntl.p1
2505; CHECK-RV64C-NEXT:    sb a6, 0(a0)
2506; CHECK-RV64C-NEXT:    c.ntl.p1
2507; CHECK-RV64C-NEXT:    sb a7, 1(a0)
2508; CHECK-RV64C-NEXT:    c.ntl.p1
2509; CHECK-RV64C-NEXT:    sb t0, 2(a0)
2510; CHECK-RV64C-NEXT:    c.ntl.p1
2511; CHECK-RV64C-NEXT:    sb t1, 3(a0)
2512; CHECK-RV64C-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
2513; CHECK-RV64C-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
2514; CHECK-RV64C-NEXT:    .cfi_restore s0
2515; CHECK-RV64C-NEXT:    .cfi_restore s1
2516; CHECK-RV64C-NEXT:    addi sp, sp, 16
2517; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 0
2518; CHECK-RV64C-NEXT:    ret
2519;
2520; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v16i8:
2521; CHECK-RV32C:       # %bb.0:
2522; CHECK-RV32C-NEXT:    addi sp, sp, -16
2523; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 16
2524; CHECK-RV32C-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
2525; CHECK-RV32C-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
2526; CHECK-RV32C-NEXT:    .cfi_offset s0, -4
2527; CHECK-RV32C-NEXT:    .cfi_offset s1, -8
2528; CHECK-RV32C-NEXT:    lbu a6, 0(a1)
2529; CHECK-RV32C-NEXT:    lbu a7, 4(a1)
2530; CHECK-RV32C-NEXT:    lbu t0, 8(a1)
2531; CHECK-RV32C-NEXT:    lbu t1, 12(a1)
2532; CHECK-RV32C-NEXT:    lbu t2, 16(a1)
2533; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
2534; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
2535; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
2536; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
2537; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
2538; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
2539; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
2540; CHECK-RV32C-NEXT:    lbu a2, 48(a1)
2541; CHECK-RV32C-NEXT:    lbu s0, 52(a1)
2542; CHECK-RV32C-NEXT:    lbu s1, 56(a1)
2543; CHECK-RV32C-NEXT:    lbu a1, 60(a1)
2544; CHECK-RV32C-NEXT:    c.ntl.p1
2545; CHECK-RV32C-NEXT:    sb a2, 12(a0)
2546; CHECK-RV32C-NEXT:    c.ntl.p1
2547; CHECK-RV32C-NEXT:    sb s0, 13(a0)
2548; CHECK-RV32C-NEXT:    c.ntl.p1
2549; CHECK-RV32C-NEXT:    sb s1, 14(a0)
2550; CHECK-RV32C-NEXT:    c.ntl.p1
2551; CHECK-RV32C-NEXT:    sb a1, 15(a0)
2552; CHECK-RV32C-NEXT:    c.ntl.p1
2553; CHECK-RV32C-NEXT:    sb t6, 8(a0)
2554; CHECK-RV32C-NEXT:    c.ntl.p1
2555; CHECK-RV32C-NEXT:    sb a3, 9(a0)
2556; CHECK-RV32C-NEXT:    c.ntl.p1
2557; CHECK-RV32C-NEXT:    sb a4, 10(a0)
2558; CHECK-RV32C-NEXT:    c.ntl.p1
2559; CHECK-RV32C-NEXT:    sb a5, 11(a0)
2560; CHECK-RV32C-NEXT:    c.ntl.p1
2561; CHECK-RV32C-NEXT:    sb t2, 4(a0)
2562; CHECK-RV32C-NEXT:    c.ntl.p1
2563; CHECK-RV32C-NEXT:    sb t3, 5(a0)
2564; CHECK-RV32C-NEXT:    c.ntl.p1
2565; CHECK-RV32C-NEXT:    sb t4, 6(a0)
2566; CHECK-RV32C-NEXT:    c.ntl.p1
2567; CHECK-RV32C-NEXT:    sb t5, 7(a0)
2568; CHECK-RV32C-NEXT:    c.ntl.p1
2569; CHECK-RV32C-NEXT:    sb a6, 0(a0)
2570; CHECK-RV32C-NEXT:    c.ntl.p1
2571; CHECK-RV32C-NEXT:    sb a7, 1(a0)
2572; CHECK-RV32C-NEXT:    c.ntl.p1
2573; CHECK-RV32C-NEXT:    sb t0, 2(a0)
2574; CHECK-RV32C-NEXT:    c.ntl.p1
2575; CHECK-RV32C-NEXT:    sb t1, 3(a0)
2576; CHECK-RV32C-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
2577; CHECK-RV32C-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
2578; CHECK-RV32C-NEXT:    .cfi_restore s0
2579; CHECK-RV32C-NEXT:    .cfi_restore s1
2580; CHECK-RV32C-NEXT:    addi sp, sp, 16
2581; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 0
2582; CHECK-RV32C-NEXT:    ret
2583;
2584; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v16i8:
2585; CHECK-RV64V:       # %bb.0:
2586; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
2587; CHECK-RV64V-NEXT:    ntl.p1
2588; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
2589; CHECK-RV64V-NEXT:    ret
2590;
2591; CHECK-RV32V-LABEL: test_nontemporal_P1_store_v16i8:
2592; CHECK-RV32V:       # %bb.0:
2593; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
2594; CHECK-RV32V-NEXT:    ntl.p1
2595; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
2596; CHECK-RV32V-NEXT:    ret
2597  store <16 x i8> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2598  ret void
2599}
2600
2601define void @test_nontemporal_P1_store_v8i16(ptr %p, <8 x i16> %v) {
2602; CHECK-RV64-LABEL: test_nontemporal_P1_store_v8i16:
2603; CHECK-RV64:       # %bb.0:
2604; CHECK-RV64-NEXT:    lh a2, 0(a1)
2605; CHECK-RV64-NEXT:    lh a3, 8(a1)
2606; CHECK-RV64-NEXT:    lh a4, 16(a1)
2607; CHECK-RV64-NEXT:    lh a5, 24(a1)
2608; CHECK-RV64-NEXT:    lh a6, 32(a1)
2609; CHECK-RV64-NEXT:    lh a7, 40(a1)
2610; CHECK-RV64-NEXT:    lh t0, 48(a1)
2611; CHECK-RV64-NEXT:    lh a1, 56(a1)
2612; CHECK-RV64-NEXT:    ntl.p1
2613; CHECK-RV64-NEXT:    sh a6, 8(a0)
2614; CHECK-RV64-NEXT:    ntl.p1
2615; CHECK-RV64-NEXT:    sh a7, 10(a0)
2616; CHECK-RV64-NEXT:    ntl.p1
2617; CHECK-RV64-NEXT:    sh t0, 12(a0)
2618; CHECK-RV64-NEXT:    ntl.p1
2619; CHECK-RV64-NEXT:    sh a1, 14(a0)
2620; CHECK-RV64-NEXT:    ntl.p1
2621; CHECK-RV64-NEXT:    sh a2, 0(a0)
2622; CHECK-RV64-NEXT:    ntl.p1
2623; CHECK-RV64-NEXT:    sh a3, 2(a0)
2624; CHECK-RV64-NEXT:    ntl.p1
2625; CHECK-RV64-NEXT:    sh a4, 4(a0)
2626; CHECK-RV64-NEXT:    ntl.p1
2627; CHECK-RV64-NEXT:    sh a5, 6(a0)
2628; CHECK-RV64-NEXT:    ret
2629;
2630; CHECK-RV32-LABEL: test_nontemporal_P1_store_v8i16:
2631; CHECK-RV32:       # %bb.0:
2632; CHECK-RV32-NEXT:    lh a2, 0(a1)
2633; CHECK-RV32-NEXT:    lh a3, 4(a1)
2634; CHECK-RV32-NEXT:    lh a4, 8(a1)
2635; CHECK-RV32-NEXT:    lh a5, 12(a1)
2636; CHECK-RV32-NEXT:    lh a6, 16(a1)
2637; CHECK-RV32-NEXT:    lh a7, 20(a1)
2638; CHECK-RV32-NEXT:    lh t0, 24(a1)
2639; CHECK-RV32-NEXT:    lh a1, 28(a1)
2640; CHECK-RV32-NEXT:    ntl.p1
2641; CHECK-RV32-NEXT:    sh a6, 8(a0)
2642; CHECK-RV32-NEXT:    ntl.p1
2643; CHECK-RV32-NEXT:    sh a7, 10(a0)
2644; CHECK-RV32-NEXT:    ntl.p1
2645; CHECK-RV32-NEXT:    sh t0, 12(a0)
2646; CHECK-RV32-NEXT:    ntl.p1
2647; CHECK-RV32-NEXT:    sh a1, 14(a0)
2648; CHECK-RV32-NEXT:    ntl.p1
2649; CHECK-RV32-NEXT:    sh a2, 0(a0)
2650; CHECK-RV32-NEXT:    ntl.p1
2651; CHECK-RV32-NEXT:    sh a3, 2(a0)
2652; CHECK-RV32-NEXT:    ntl.p1
2653; CHECK-RV32-NEXT:    sh a4, 4(a0)
2654; CHECK-RV32-NEXT:    ntl.p1
2655; CHECK-RV32-NEXT:    sh a5, 6(a0)
2656; CHECK-RV32-NEXT:    ret
2657;
2658; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v8i16:
2659; CHECK-RV64C:       # %bb.0:
2660; CHECK-RV64C-NEXT:    lh a6, 0(a1)
2661; CHECK-RV64C-NEXT:    lh a7, 8(a1)
2662; CHECK-RV64C-NEXT:    lh t0, 16(a1)
2663; CHECK-RV64C-NEXT:    lh a5, 24(a1)
2664; CHECK-RV64C-NEXT:    lh a2, 32(a1)
2665; CHECK-RV64C-NEXT:    lh a3, 40(a1)
2666; CHECK-RV64C-NEXT:    lh a4, 48(a1)
2667; CHECK-RV64C-NEXT:    lh a1, 56(a1)
2668; CHECK-RV64C-NEXT:    c.ntl.p1
2669; CHECK-RV64C-NEXT:    sh a2, 8(a0)
2670; CHECK-RV64C-NEXT:    c.ntl.p1
2671; CHECK-RV64C-NEXT:    sh a3, 10(a0)
2672; CHECK-RV64C-NEXT:    c.ntl.p1
2673; CHECK-RV64C-NEXT:    sh a4, 12(a0)
2674; CHECK-RV64C-NEXT:    c.ntl.p1
2675; CHECK-RV64C-NEXT:    sh a1, 14(a0)
2676; CHECK-RV64C-NEXT:    c.ntl.p1
2677; CHECK-RV64C-NEXT:    sh a6, 0(a0)
2678; CHECK-RV64C-NEXT:    c.ntl.p1
2679; CHECK-RV64C-NEXT:    sh a7, 2(a0)
2680; CHECK-RV64C-NEXT:    c.ntl.p1
2681; CHECK-RV64C-NEXT:    sh t0, 4(a0)
2682; CHECK-RV64C-NEXT:    c.ntl.p1
2683; CHECK-RV64C-NEXT:    sh a5, 6(a0)
2684; CHECK-RV64C-NEXT:    ret
2685;
2686; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v8i16:
2687; CHECK-RV32C:       # %bb.0:
2688; CHECK-RV32C-NEXT:    lh a6, 0(a1)
2689; CHECK-RV32C-NEXT:    lh a7, 4(a1)
2690; CHECK-RV32C-NEXT:    lh t0, 8(a1)
2691; CHECK-RV32C-NEXT:    lh a5, 12(a1)
2692; CHECK-RV32C-NEXT:    lh a2, 16(a1)
2693; CHECK-RV32C-NEXT:    lh a3, 20(a1)
2694; CHECK-RV32C-NEXT:    lh a4, 24(a1)
2695; CHECK-RV32C-NEXT:    lh a1, 28(a1)
2696; CHECK-RV32C-NEXT:    c.ntl.p1
2697; CHECK-RV32C-NEXT:    sh a2, 8(a0)
2698; CHECK-RV32C-NEXT:    c.ntl.p1
2699; CHECK-RV32C-NEXT:    sh a3, 10(a0)
2700; CHECK-RV32C-NEXT:    c.ntl.p1
2701; CHECK-RV32C-NEXT:    sh a4, 12(a0)
2702; CHECK-RV32C-NEXT:    c.ntl.p1
2703; CHECK-RV32C-NEXT:    sh a1, 14(a0)
2704; CHECK-RV32C-NEXT:    c.ntl.p1
2705; CHECK-RV32C-NEXT:    sh a6, 0(a0)
2706; CHECK-RV32C-NEXT:    c.ntl.p1
2707; CHECK-RV32C-NEXT:    sh a7, 2(a0)
2708; CHECK-RV32C-NEXT:    c.ntl.p1
2709; CHECK-RV32C-NEXT:    sh t0, 4(a0)
2710; CHECK-RV32C-NEXT:    c.ntl.p1
2711; CHECK-RV32C-NEXT:    sh a5, 6(a0)
2712; CHECK-RV32C-NEXT:    ret
2713;
2714; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v8i16:
2715; CHECK-RV64V:       # %bb.0:
2716; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2717; CHECK-RV64V-NEXT:    ntl.p1
2718; CHECK-RV64V-NEXT:    vse16.v v8, (a0)
2719; CHECK-RV64V-NEXT:    ret
2720;
2721; CHECK-RV32V-LABEL: test_nontemporal_P1_store_v8i16:
2722; CHECK-RV32V:       # %bb.0:
2723; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
2724; CHECK-RV32V-NEXT:    ntl.p1
2725; CHECK-RV32V-NEXT:    vse16.v v8, (a0)
2726; CHECK-RV32V-NEXT:    ret
2727  store <8 x i16> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2728  ret void
2729}
2730
2731define void @test_nontemporal_P1_store_v4i32(ptr %p, <4 x i32> %v) {
2732; CHECK-RV64-LABEL: test_nontemporal_P1_store_v4i32:
2733; CHECK-RV64:       # %bb.0:
2734; CHECK-RV64-NEXT:    lw a2, 0(a1)
2735; CHECK-RV64-NEXT:    lw a3, 8(a1)
2736; CHECK-RV64-NEXT:    lw a4, 16(a1)
2737; CHECK-RV64-NEXT:    lw a1, 24(a1)
2738; CHECK-RV64-NEXT:    ntl.p1
2739; CHECK-RV64-NEXT:    sw a2, 0(a0)
2740; CHECK-RV64-NEXT:    ntl.p1
2741; CHECK-RV64-NEXT:    sw a3, 4(a0)
2742; CHECK-RV64-NEXT:    ntl.p1
2743; CHECK-RV64-NEXT:    sw a4, 8(a0)
2744; CHECK-RV64-NEXT:    ntl.p1
2745; CHECK-RV64-NEXT:    sw a1, 12(a0)
2746; CHECK-RV64-NEXT:    ret
2747;
2748; CHECK-RV32-LABEL: test_nontemporal_P1_store_v4i32:
2749; CHECK-RV32:       # %bb.0:
2750; CHECK-RV32-NEXT:    lw a2, 0(a1)
2751; CHECK-RV32-NEXT:    lw a3, 4(a1)
2752; CHECK-RV32-NEXT:    lw a4, 8(a1)
2753; CHECK-RV32-NEXT:    lw a1, 12(a1)
2754; CHECK-RV32-NEXT:    ntl.p1
2755; CHECK-RV32-NEXT:    sw a2, 0(a0)
2756; CHECK-RV32-NEXT:    ntl.p1
2757; CHECK-RV32-NEXT:    sw a3, 4(a0)
2758; CHECK-RV32-NEXT:    ntl.p1
2759; CHECK-RV32-NEXT:    sw a4, 8(a0)
2760; CHECK-RV32-NEXT:    ntl.p1
2761; CHECK-RV32-NEXT:    sw a1, 12(a0)
2762; CHECK-RV32-NEXT:    ret
2763;
2764; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v4i32:
2765; CHECK-RV64C:       # %bb.0:
2766; CHECK-RV64C-NEXT:    lw a2, 0(a1)
2767; CHECK-RV64C-NEXT:    lw a3, 8(a1)
2768; CHECK-RV64C-NEXT:    lw a4, 16(a1)
2769; CHECK-RV64C-NEXT:    lw a1, 24(a1)
2770; CHECK-RV64C-NEXT:    c.ntl.p1
2771; CHECK-RV64C-NEXT:    sw a2, 0(a0)
2772; CHECK-RV64C-NEXT:    c.ntl.p1
2773; CHECK-RV64C-NEXT:    sw a3, 4(a0)
2774; CHECK-RV64C-NEXT:    c.ntl.p1
2775; CHECK-RV64C-NEXT:    sw a4, 8(a0)
2776; CHECK-RV64C-NEXT:    c.ntl.p1
2777; CHECK-RV64C-NEXT:    sw a1, 12(a0)
2778; CHECK-RV64C-NEXT:    ret
2779;
2780; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v4i32:
2781; CHECK-RV32C:       # %bb.0:
2782; CHECK-RV32C-NEXT:    lw a2, 0(a1)
2783; CHECK-RV32C-NEXT:    lw a3, 4(a1)
2784; CHECK-RV32C-NEXT:    lw a4, 8(a1)
2785; CHECK-RV32C-NEXT:    lw a1, 12(a1)
2786; CHECK-RV32C-NEXT:    c.ntl.p1
2787; CHECK-RV32C-NEXT:    sw a2, 0(a0)
2788; CHECK-RV32C-NEXT:    c.ntl.p1
2789; CHECK-RV32C-NEXT:    sw a3, 4(a0)
2790; CHECK-RV32C-NEXT:    c.ntl.p1
2791; CHECK-RV32C-NEXT:    sw a4, 8(a0)
2792; CHECK-RV32C-NEXT:    c.ntl.p1
2793; CHECK-RV32C-NEXT:    sw a1, 12(a0)
2794; CHECK-RV32C-NEXT:    ret
2795;
2796; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v4i32:
2797; CHECK-RV64V:       # %bb.0:
2798; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2799; CHECK-RV64V-NEXT:    ntl.p1
2800; CHECK-RV64V-NEXT:    vse32.v v8, (a0)
2801; CHECK-RV64V-NEXT:    ret
2802;
2803; CHECK-RV32V-LABEL: test_nontemporal_P1_store_v4i32:
2804; CHECK-RV32V:       # %bb.0:
2805; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
2806; CHECK-RV32V-NEXT:    ntl.p1
2807; CHECK-RV32V-NEXT:    vse32.v v8, (a0)
2808; CHECK-RV32V-NEXT:    ret
2809  store <4 x i32> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2810  ret void
2811}
2812
2813define void @test_nontemporal_P1_store_v2i64(ptr %p, <2 x i64> %v) {
2814; CHECK-RV64-LABEL: test_nontemporal_P1_store_v2i64:
2815; CHECK-RV64:       # %bb.0:
2816; CHECK-RV64-NEXT:    ntl.p1
2817; CHECK-RV64-NEXT:    sd a1, 0(a0)
2818; CHECK-RV64-NEXT:    ntl.p1
2819; CHECK-RV64-NEXT:    sd a2, 8(a0)
2820; CHECK-RV64-NEXT:    ret
2821;
2822; CHECK-RV32-LABEL: test_nontemporal_P1_store_v2i64:
2823; CHECK-RV32:       # %bb.0:
2824; CHECK-RV32-NEXT:    lw a2, 0(a1)
2825; CHECK-RV32-NEXT:    lw a3, 4(a1)
2826; CHECK-RV32-NEXT:    lw a4, 8(a1)
2827; CHECK-RV32-NEXT:    lw a1, 12(a1)
2828; CHECK-RV32-NEXT:    ntl.p1
2829; CHECK-RV32-NEXT:    sw a2, 0(a0)
2830; CHECK-RV32-NEXT:    ntl.p1
2831; CHECK-RV32-NEXT:    sw a3, 4(a0)
2832; CHECK-RV32-NEXT:    ntl.p1
2833; CHECK-RV32-NEXT:    sw a4, 8(a0)
2834; CHECK-RV32-NEXT:    ntl.p1
2835; CHECK-RV32-NEXT:    sw a1, 12(a0)
2836; CHECK-RV32-NEXT:    ret
2837;
2838; CHECK-RV64C-LABEL: test_nontemporal_P1_store_v2i64:
2839; CHECK-RV64C:       # %bb.0:
2840; CHECK-RV64C-NEXT:    c.ntl.p1
2841; CHECK-RV64C-NEXT:    sd a1, 0(a0)
2842; CHECK-RV64C-NEXT:    c.ntl.p1
2843; CHECK-RV64C-NEXT:    sd a2, 8(a0)
2844; CHECK-RV64C-NEXT:    ret
2845;
2846; CHECK-RV32C-LABEL: test_nontemporal_P1_store_v2i64:
2847; CHECK-RV32C:       # %bb.0:
2848; CHECK-RV32C-NEXT:    lw a2, 0(a1)
2849; CHECK-RV32C-NEXT:    lw a3, 4(a1)
2850; CHECK-RV32C-NEXT:    lw a4, 8(a1)
2851; CHECK-RV32C-NEXT:    lw a1, 12(a1)
2852; CHECK-RV32C-NEXT:    c.ntl.p1
2853; CHECK-RV32C-NEXT:    sw a2, 0(a0)
2854; CHECK-RV32C-NEXT:    c.ntl.p1
2855; CHECK-RV32C-NEXT:    sw a3, 4(a0)
2856; CHECK-RV32C-NEXT:    c.ntl.p1
2857; CHECK-RV32C-NEXT:    sw a4, 8(a0)
2858; CHECK-RV32C-NEXT:    c.ntl.p1
2859; CHECK-RV32C-NEXT:    sw a1, 12(a0)
2860; CHECK-RV32C-NEXT:    ret
2861;
2862; CHECK-RV64V-LABEL: test_nontemporal_P1_store_v2i64:
2863; CHECK-RV64V:       # %bb.0:
2864; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2865; CHECK-RV64V-NEXT:    ntl.p1
2866; CHECK-RV64V-NEXT:    vse64.v v8, (a0)
2867; CHECK-RV64V-NEXT:    ret
2868;
2869; CHECK-RV32V-LABEL: test_nontemporal_P1_store_v2i64:
2870; CHECK-RV32V:       # %bb.0:
2871; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
2872; CHECK-RV32V-NEXT:    ntl.p1
2873; CHECK-RV32V-NEXT:    vse64.v v8, (a0)
2874; CHECK-RV32V-NEXT:    ret
2875  store <2 x i64> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !1
2876  ret void
2877}
2878
2879define i64 @test_nontemporal_PALL_load_i64(ptr %p) {
2880; CHECK-RV64-LABEL: test_nontemporal_PALL_load_i64:
2881; CHECK-RV64:       # %bb.0:
2882; CHECK-RV64-NEXT:    ntl.pall
2883; CHECK-RV64-NEXT:    ld a0, 0(a0)
2884; CHECK-RV64-NEXT:    ret
2885;
2886; CHECK-RV32-LABEL: test_nontemporal_PALL_load_i64:
2887; CHECK-RV32:       # %bb.0:
2888; CHECK-RV32-NEXT:    ntl.pall
2889; CHECK-RV32-NEXT:    lw a2, 0(a0)
2890; CHECK-RV32-NEXT:    ntl.pall
2891; CHECK-RV32-NEXT:    lw a1, 4(a0)
2892; CHECK-RV32-NEXT:    mv a0, a2
2893; CHECK-RV32-NEXT:    ret
2894;
2895; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_i64:
2896; CHECK-RV64C:       # %bb.0:
2897; CHECK-RV64C-NEXT:    c.ntl.pall
2898; CHECK-RV64C-NEXT:    ld a0, 0(a0)
2899; CHECK-RV64C-NEXT:    ret
2900;
2901; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_i64:
2902; CHECK-RV32C:       # %bb.0:
2903; CHECK-RV32C-NEXT:    c.ntl.pall
2904; CHECK-RV32C-NEXT:    lw a2, 0(a0)
2905; CHECK-RV32C-NEXT:    c.ntl.pall
2906; CHECK-RV32C-NEXT:    lw a1, 4(a0)
2907; CHECK-RV32C-NEXT:    mv a0, a2
2908; CHECK-RV32C-NEXT:    ret
2909;
2910; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_i64:
2911; CHECK-RV64V:       # %bb.0:
2912; CHECK-RV64V-NEXT:    ntl.pall
2913; CHECK-RV64V-NEXT:    ld a0, 0(a0)
2914; CHECK-RV64V-NEXT:    ret
2915;
2916; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_i64:
2917; CHECK-RV32V:       # %bb.0:
2918; CHECK-RV32V-NEXT:    ntl.pall
2919; CHECK-RV32V-NEXT:    lw a2, 0(a0)
2920; CHECK-RV32V-NEXT:    ntl.pall
2921; CHECK-RV32V-NEXT:    lw a1, 4(a0)
2922; CHECK-RV32V-NEXT:    mv a0, a2
2923; CHECK-RV32V-NEXT:    ret
2924  %1 = load i64, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
2925  ret i64 %1
2926}
2927
2928define i32 @test_nontemporal_PALL_load_i32(ptr %p) {
2929; CHECK-RV64-LABEL: test_nontemporal_PALL_load_i32:
2930; CHECK-RV64:       # %bb.0:
2931; CHECK-RV64-NEXT:    ntl.pall
2932; CHECK-RV64-NEXT:    lw a0, 0(a0)
2933; CHECK-RV64-NEXT:    ret
2934;
2935; CHECK-RV32-LABEL: test_nontemporal_PALL_load_i32:
2936; CHECK-RV32:       # %bb.0:
2937; CHECK-RV32-NEXT:    ntl.pall
2938; CHECK-RV32-NEXT:    lw a0, 0(a0)
2939; CHECK-RV32-NEXT:    ret
2940;
2941; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_i32:
2942; CHECK-RV64C:       # %bb.0:
2943; CHECK-RV64C-NEXT:    c.ntl.pall
2944; CHECK-RV64C-NEXT:    lw a0, 0(a0)
2945; CHECK-RV64C-NEXT:    ret
2946;
2947; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_i32:
2948; CHECK-RV32C:       # %bb.0:
2949; CHECK-RV32C-NEXT:    c.ntl.pall
2950; CHECK-RV32C-NEXT:    lw a0, 0(a0)
2951; CHECK-RV32C-NEXT:    ret
2952;
2953; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_i32:
2954; CHECK-RV64V:       # %bb.0:
2955; CHECK-RV64V-NEXT:    ntl.pall
2956; CHECK-RV64V-NEXT:    lw a0, 0(a0)
2957; CHECK-RV64V-NEXT:    ret
2958;
2959; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_i32:
2960; CHECK-RV32V:       # %bb.0:
2961; CHECK-RV32V-NEXT:    ntl.pall
2962; CHECK-RV32V-NEXT:    lw a0, 0(a0)
2963; CHECK-RV32V-NEXT:    ret
2964  %1 = load i32, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
2965  ret i32 %1
2966}
2967
2968define i16 @test_nontemporal_PALL_load_i16(ptr %p) {
2969; CHECK-RV64-LABEL: test_nontemporal_PALL_load_i16:
2970; CHECK-RV64:       # %bb.0:
2971; CHECK-RV64-NEXT:    ntl.pall
2972; CHECK-RV64-NEXT:    lh a0, 0(a0)
2973; CHECK-RV64-NEXT:    ret
2974;
2975; CHECK-RV32-LABEL: test_nontemporal_PALL_load_i16:
2976; CHECK-RV32:       # %bb.0:
2977; CHECK-RV32-NEXT:    ntl.pall
2978; CHECK-RV32-NEXT:    lh a0, 0(a0)
2979; CHECK-RV32-NEXT:    ret
2980;
2981; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_i16:
2982; CHECK-RV64C:       # %bb.0:
2983; CHECK-RV64C-NEXT:    c.ntl.pall
2984; CHECK-RV64C-NEXT:    lh a0, 0(a0)
2985; CHECK-RV64C-NEXT:    ret
2986;
2987; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_i16:
2988; CHECK-RV32C:       # %bb.0:
2989; CHECK-RV32C-NEXT:    c.ntl.pall
2990; CHECK-RV32C-NEXT:    lh a0, 0(a0)
2991; CHECK-RV32C-NEXT:    ret
2992;
2993; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_i16:
2994; CHECK-RV64V:       # %bb.0:
2995; CHECK-RV64V-NEXT:    ntl.pall
2996; CHECK-RV64V-NEXT:    lh a0, 0(a0)
2997; CHECK-RV64V-NEXT:    ret
2998;
2999; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_i16:
3000; CHECK-RV32V:       # %bb.0:
3001; CHECK-RV32V-NEXT:    ntl.pall
3002; CHECK-RV32V-NEXT:    lh a0, 0(a0)
3003; CHECK-RV32V-NEXT:    ret
3004  %1 = load i16, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3005  ret i16 %1
3006}
3007
3008define i8 @test_nontemporal_PALL_load_i8(ptr %p) {
3009; CHECK-RV64-LABEL: test_nontemporal_PALL_load_i8:
3010; CHECK-RV64:       # %bb.0:
3011; CHECK-RV64-NEXT:    ntl.pall
3012; CHECK-RV64-NEXT:    lbu a0, 0(a0)
3013; CHECK-RV64-NEXT:    ret
3014;
3015; CHECK-RV32-LABEL: test_nontemporal_PALL_load_i8:
3016; CHECK-RV32:       # %bb.0:
3017; CHECK-RV32-NEXT:    ntl.pall
3018; CHECK-RV32-NEXT:    lbu a0, 0(a0)
3019; CHECK-RV32-NEXT:    ret
3020;
3021; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_i8:
3022; CHECK-RV64C:       # %bb.0:
3023; CHECK-RV64C-NEXT:    c.ntl.pall
3024; CHECK-RV64C-NEXT:    lbu a0, 0(a0)
3025; CHECK-RV64C-NEXT:    ret
3026;
3027; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_i8:
3028; CHECK-RV32C:       # %bb.0:
3029; CHECK-RV32C-NEXT:    c.ntl.pall
3030; CHECK-RV32C-NEXT:    lbu a0, 0(a0)
3031; CHECK-RV32C-NEXT:    ret
3032;
3033; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_i8:
3034; CHECK-RV64V:       # %bb.0:
3035; CHECK-RV64V-NEXT:    ntl.pall
3036; CHECK-RV64V-NEXT:    lbu a0, 0(a0)
3037; CHECK-RV64V-NEXT:    ret
3038;
3039; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_i8:
3040; CHECK-RV32V:       # %bb.0:
3041; CHECK-RV32V-NEXT:    ntl.pall
3042; CHECK-RV32V-NEXT:    lbu a0, 0(a0)
3043; CHECK-RV32V-NEXT:    ret
3044  %1 = load i8, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3045  ret i8 %1
3046}
3047
3048define half @test_nontemporal_PALL_load_half(ptr %p) nounwind {
3049; CHECK-RV64-LABEL: test_nontemporal_PALL_load_half:
3050; CHECK-RV64:       # %bb.0:
3051; CHECK-RV64-NEXT:    ntl.pall
3052; CHECK-RV64-NEXT:    flh fa5, 0(a0)
3053; CHECK-RV64-NEXT:    ntl.pall
3054; CHECK-RV64-NEXT:    flh fa4, 6(a0)
3055; CHECK-RV64-NEXT:    fadd.h fa0, fa5, fa4
3056; CHECK-RV64-NEXT:    ret
3057;
3058; CHECK-RV32-LABEL: test_nontemporal_PALL_load_half:
3059; CHECK-RV32:       # %bb.0:
3060; CHECK-RV32-NEXT:    ntl.pall
3061; CHECK-RV32-NEXT:    flh fa5, 0(a0)
3062; CHECK-RV32-NEXT:    ntl.pall
3063; CHECK-RV32-NEXT:    flh fa4, 6(a0)
3064; CHECK-RV32-NEXT:    fadd.h fa0, fa5, fa4
3065; CHECK-RV32-NEXT:    ret
3066;
3067; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_half:
3068; CHECK-RV64C:       # %bb.0:
3069; CHECK-RV64C-NEXT:    c.ntl.pall
3070; CHECK-RV64C-NEXT:    flh fa5, 0(a0)
3071; CHECK-RV64C-NEXT:    c.ntl.pall
3072; CHECK-RV64C-NEXT:    flh fa4, 6(a0)
3073; CHECK-RV64C-NEXT:    fadd.h fa0, fa5, fa4
3074; CHECK-RV64C-NEXT:    ret
3075;
3076; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_half:
3077; CHECK-RV32C:       # %bb.0:
3078; CHECK-RV32C-NEXT:    c.ntl.pall
3079; CHECK-RV32C-NEXT:    flh fa5, 0(a0)
3080; CHECK-RV32C-NEXT:    c.ntl.pall
3081; CHECK-RV32C-NEXT:    flh fa4, 6(a0)
3082; CHECK-RV32C-NEXT:    fadd.h fa0, fa5, fa4
3083; CHECK-RV32C-NEXT:    ret
3084;
3085; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_half:
3086; CHECK-RV64V:       # %bb.0:
3087; CHECK-RV64V-NEXT:    ntl.pall
3088; CHECK-RV64V-NEXT:    flh fa5, 0(a0)
3089; CHECK-RV64V-NEXT:    ntl.pall
3090; CHECK-RV64V-NEXT:    flh fa4, 6(a0)
3091; CHECK-RV64V-NEXT:    fadd.h fa0, fa5, fa4
3092; CHECK-RV64V-NEXT:    ret
3093;
3094; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_half:
3095; CHECK-RV32V:       # %bb.0:
3096; CHECK-RV32V-NEXT:    ntl.pall
3097; CHECK-RV32V-NEXT:    flh fa5, 0(a0)
3098; CHECK-RV32V-NEXT:    ntl.pall
3099; CHECK-RV32V-NEXT:    flh fa4, 6(a0)
3100; CHECK-RV32V-NEXT:    fadd.h fa0, fa5, fa4
3101; CHECK-RV32V-NEXT:    ret
3102  %1 = load half, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3103  %2 = getelementptr half, ptr %p, i32 3
3104  %3 = load half, ptr %2, !nontemporal !0, !riscv-nontemporal-domain !2
3105  %4 = fadd half %1, %3
3106  ret half %4
3107}
3108
3109define float @test_nontemporal_PALL_load_float(ptr %p) {
3110; CHECK-RV64-LABEL: test_nontemporal_PALL_load_float:
3111; CHECK-RV64:       # %bb.0:
3112; CHECK-RV64-NEXT:    ntl.pall
3113; CHECK-RV64-NEXT:    flw fa0, 0(a0)
3114; CHECK-RV64-NEXT:    ret
3115;
3116; CHECK-RV32-LABEL: test_nontemporal_PALL_load_float:
3117; CHECK-RV32:       # %bb.0:
3118; CHECK-RV32-NEXT:    ntl.pall
3119; CHECK-RV32-NEXT:    flw fa0, 0(a0)
3120; CHECK-RV32-NEXT:    ret
3121;
3122; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_float:
3123; CHECK-RV64C:       # %bb.0:
3124; CHECK-RV64C-NEXT:    c.ntl.pall
3125; CHECK-RV64C-NEXT:    flw fa0, 0(a0)
3126; CHECK-RV64C-NEXT:    ret
3127;
3128; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_float:
3129; CHECK-RV32C:       # %bb.0:
3130; CHECK-RV32C-NEXT:    c.ntl.pall
3131; CHECK-RV32C-NEXT:    flw fa0, 0(a0)
3132; CHECK-RV32C-NEXT:    ret
3133;
3134; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_float:
3135; CHECK-RV64V:       # %bb.0:
3136; CHECK-RV64V-NEXT:    ntl.pall
3137; CHECK-RV64V-NEXT:    flw fa0, 0(a0)
3138; CHECK-RV64V-NEXT:    ret
3139;
3140; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_float:
3141; CHECK-RV32V:       # %bb.0:
3142; CHECK-RV32V-NEXT:    ntl.pall
3143; CHECK-RV32V-NEXT:    flw fa0, 0(a0)
3144; CHECK-RV32V-NEXT:    ret
3145  %1 = load float, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3146  ret float %1
3147}
3148
3149define double @test_nontemporal_PALL_load_double(ptr %p) {
3150; CHECK-RV64-LABEL: test_nontemporal_PALL_load_double:
3151; CHECK-RV64:       # %bb.0:
3152; CHECK-RV64-NEXT:    ntl.pall
3153; CHECK-RV64-NEXT:    fld fa0, 0(a0)
3154; CHECK-RV64-NEXT:    ret
3155;
3156; CHECK-RV32-LABEL: test_nontemporal_PALL_load_double:
3157; CHECK-RV32:       # %bb.0:
3158; CHECK-RV32-NEXT:    ntl.pall
3159; CHECK-RV32-NEXT:    fld fa0, 0(a0)
3160; CHECK-RV32-NEXT:    ret
3161;
3162; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_double:
3163; CHECK-RV64C:       # %bb.0:
3164; CHECK-RV64C-NEXT:    c.ntl.pall
3165; CHECK-RV64C-NEXT:    fld fa0, 0(a0)
3166; CHECK-RV64C-NEXT:    ret
3167;
3168; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_double:
3169; CHECK-RV32C:       # %bb.0:
3170; CHECK-RV32C-NEXT:    c.ntl.pall
3171; CHECK-RV32C-NEXT:    fld fa0, 0(a0)
3172; CHECK-RV32C-NEXT:    ret
3173;
3174; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_double:
3175; CHECK-RV64V:       # %bb.0:
3176; CHECK-RV64V-NEXT:    ntl.pall
3177; CHECK-RV64V-NEXT:    fld fa0, 0(a0)
3178; CHECK-RV64V-NEXT:    ret
3179;
3180; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_double:
3181; CHECK-RV32V:       # %bb.0:
3182; CHECK-RV32V-NEXT:    ntl.pall
3183; CHECK-RV32V-NEXT:    fld fa0, 0(a0)
3184; CHECK-RV32V-NEXT:    ret
3185  %1 = load double, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3186  ret double %1
3187}
3188
3189define <16 x i8> @test_nontemporal_PALL_load_v16i8(ptr %p) {
3190; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v16i8:
3191; CHECK-RV64:       # %bb.0:
3192; CHECK-RV64-NEXT:    ntl.pall
3193; CHECK-RV64-NEXT:    ld a2, 0(a1)
3194; CHECK-RV64-NEXT:    ntl.pall
3195; CHECK-RV64-NEXT:    ld a1, 8(a1)
3196; CHECK-RV64-NEXT:    sd a2, 0(a0)
3197; CHECK-RV64-NEXT:    sd a1, 8(a0)
3198; CHECK-RV64-NEXT:    ret
3199;
3200; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v16i8:
3201; CHECK-RV32:       # %bb.0:
3202; CHECK-RV32-NEXT:    ntl.pall
3203; CHECK-RV32-NEXT:    lw a2, 0(a1)
3204; CHECK-RV32-NEXT:    ntl.pall
3205; CHECK-RV32-NEXT:    lw a3, 4(a1)
3206; CHECK-RV32-NEXT:    ntl.pall
3207; CHECK-RV32-NEXT:    lw a4, 8(a1)
3208; CHECK-RV32-NEXT:    ntl.pall
3209; CHECK-RV32-NEXT:    lw a1, 12(a1)
3210; CHECK-RV32-NEXT:    sw a2, 0(a0)
3211; CHECK-RV32-NEXT:    sw a3, 4(a0)
3212; CHECK-RV32-NEXT:    sw a4, 8(a0)
3213; CHECK-RV32-NEXT:    sw a1, 12(a0)
3214; CHECK-RV32-NEXT:    ret
3215;
3216; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v16i8:
3217; CHECK-RV64C:       # %bb.0:
3218; CHECK-RV64C-NEXT:    c.ntl.pall
3219; CHECK-RV64C-NEXT:    ld a2, 0(a1)
3220; CHECK-RV64C-NEXT:    c.ntl.pall
3221; CHECK-RV64C-NEXT:    ld a1, 8(a1)
3222; CHECK-RV64C-NEXT:    sd a2, 0(a0)
3223; CHECK-RV64C-NEXT:    sd a1, 8(a0)
3224; CHECK-RV64C-NEXT:    ret
3225;
3226; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v16i8:
3227; CHECK-RV32C:       # %bb.0:
3228; CHECK-RV32C-NEXT:    c.ntl.pall
3229; CHECK-RV32C-NEXT:    lw a2, 0(a1)
3230; CHECK-RV32C-NEXT:    c.ntl.pall
3231; CHECK-RV32C-NEXT:    lw a3, 4(a1)
3232; CHECK-RV32C-NEXT:    c.ntl.pall
3233; CHECK-RV32C-NEXT:    lw a4, 8(a1)
3234; CHECK-RV32C-NEXT:    c.ntl.pall
3235; CHECK-RV32C-NEXT:    lw a1, 12(a1)
3236; CHECK-RV32C-NEXT:    sw a2, 0(a0)
3237; CHECK-RV32C-NEXT:    sw a3, 4(a0)
3238; CHECK-RV32C-NEXT:    sw a4, 8(a0)
3239; CHECK-RV32C-NEXT:    sw a1, 12(a0)
3240; CHECK-RV32C-NEXT:    ret
3241;
3242; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v16i8:
3243; CHECK-RV64V:       # %bb.0:
3244; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
3245; CHECK-RV64V-NEXT:    ntl.pall
3246; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
3247; CHECK-RV64V-NEXT:    ret
3248;
3249; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_v16i8:
3250; CHECK-RV32V:       # %bb.0:
3251; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
3252; CHECK-RV32V-NEXT:    ntl.pall
3253; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
3254; CHECK-RV32V-NEXT:    ret
3255  %1 = load <16 x i8>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3256  ret <16 x i8> %1
3257}
3258
3259define <8 x i16> @test_nontemporal_PALL_load_v8i16(ptr %p) {
3260; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v8i16:
3261; CHECK-RV64:       # %bb.0:
3262; CHECK-RV64-NEXT:    ntl.pall
3263; CHECK-RV64-NEXT:    ld a2, 0(a1)
3264; CHECK-RV64-NEXT:    ntl.pall
3265; CHECK-RV64-NEXT:    ld a1, 8(a1)
3266; CHECK-RV64-NEXT:    sd a2, 0(a0)
3267; CHECK-RV64-NEXT:    sd a1, 8(a0)
3268; CHECK-RV64-NEXT:    ret
3269;
3270; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v8i16:
3271; CHECK-RV32:       # %bb.0:
3272; CHECK-RV32-NEXT:    ntl.pall
3273; CHECK-RV32-NEXT:    lw a2, 0(a1)
3274; CHECK-RV32-NEXT:    ntl.pall
3275; CHECK-RV32-NEXT:    lw a3, 4(a1)
3276; CHECK-RV32-NEXT:    ntl.pall
3277; CHECK-RV32-NEXT:    lw a4, 8(a1)
3278; CHECK-RV32-NEXT:    ntl.pall
3279; CHECK-RV32-NEXT:    lw a1, 12(a1)
3280; CHECK-RV32-NEXT:    sw a2, 0(a0)
3281; CHECK-RV32-NEXT:    sw a3, 4(a0)
3282; CHECK-RV32-NEXT:    sw a4, 8(a0)
3283; CHECK-RV32-NEXT:    sw a1, 12(a0)
3284; CHECK-RV32-NEXT:    ret
3285;
3286; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v8i16:
3287; CHECK-RV64C:       # %bb.0:
3288; CHECK-RV64C-NEXT:    c.ntl.pall
3289; CHECK-RV64C-NEXT:    ld a2, 0(a1)
3290; CHECK-RV64C-NEXT:    c.ntl.pall
3291; CHECK-RV64C-NEXT:    ld a1, 8(a1)
3292; CHECK-RV64C-NEXT:    sd a2, 0(a0)
3293; CHECK-RV64C-NEXT:    sd a1, 8(a0)
3294; CHECK-RV64C-NEXT:    ret
3295;
3296; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v8i16:
3297; CHECK-RV32C:       # %bb.0:
3298; CHECK-RV32C-NEXT:    c.ntl.pall
3299; CHECK-RV32C-NEXT:    lw a2, 0(a1)
3300; CHECK-RV32C-NEXT:    c.ntl.pall
3301; CHECK-RV32C-NEXT:    lw a3, 4(a1)
3302; CHECK-RV32C-NEXT:    c.ntl.pall
3303; CHECK-RV32C-NEXT:    lw a4, 8(a1)
3304; CHECK-RV32C-NEXT:    c.ntl.pall
3305; CHECK-RV32C-NEXT:    lw a1, 12(a1)
3306; CHECK-RV32C-NEXT:    sw a2, 0(a0)
3307; CHECK-RV32C-NEXT:    sw a3, 4(a0)
3308; CHECK-RV32C-NEXT:    sw a4, 8(a0)
3309; CHECK-RV32C-NEXT:    sw a1, 12(a0)
3310; CHECK-RV32C-NEXT:    ret
3311;
3312; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v8i16:
3313; CHECK-RV64V:       # %bb.0:
3314; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3315; CHECK-RV64V-NEXT:    ntl.pall
3316; CHECK-RV64V-NEXT:    vle16.v v8, (a0)
3317; CHECK-RV64V-NEXT:    ret
3318;
3319; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_v8i16:
3320; CHECK-RV32V:       # %bb.0:
3321; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
3322; CHECK-RV32V-NEXT:    ntl.pall
3323; CHECK-RV32V-NEXT:    vle16.v v8, (a0)
3324; CHECK-RV32V-NEXT:    ret
3325  %1 = load <8 x i16>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3326  ret <8 x i16> %1
3327}
3328
3329define <4 x i32> @test_nontemporal_PALL_load_v4i32(ptr %p) {
3330; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v4i32:
3331; CHECK-RV64:       # %bb.0:
3332; CHECK-RV64-NEXT:    ntl.pall
3333; CHECK-RV64-NEXT:    ld a2, 0(a1)
3334; CHECK-RV64-NEXT:    ntl.pall
3335; CHECK-RV64-NEXT:    ld a1, 8(a1)
3336; CHECK-RV64-NEXT:    sd a2, 0(a0)
3337; CHECK-RV64-NEXT:    sd a1, 8(a0)
3338; CHECK-RV64-NEXT:    ret
3339;
3340; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v4i32:
3341; CHECK-RV32:       # %bb.0:
3342; CHECK-RV32-NEXT:    ntl.pall
3343; CHECK-RV32-NEXT:    lw a2, 0(a1)
3344; CHECK-RV32-NEXT:    ntl.pall
3345; CHECK-RV32-NEXT:    lw a3, 4(a1)
3346; CHECK-RV32-NEXT:    ntl.pall
3347; CHECK-RV32-NEXT:    lw a4, 8(a1)
3348; CHECK-RV32-NEXT:    ntl.pall
3349; CHECK-RV32-NEXT:    lw a1, 12(a1)
3350; CHECK-RV32-NEXT:    sw a2, 0(a0)
3351; CHECK-RV32-NEXT:    sw a3, 4(a0)
3352; CHECK-RV32-NEXT:    sw a4, 8(a0)
3353; CHECK-RV32-NEXT:    sw a1, 12(a0)
3354; CHECK-RV32-NEXT:    ret
3355;
3356; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v4i32:
3357; CHECK-RV64C:       # %bb.0:
3358; CHECK-RV64C-NEXT:    c.ntl.pall
3359; CHECK-RV64C-NEXT:    ld a2, 0(a1)
3360; CHECK-RV64C-NEXT:    c.ntl.pall
3361; CHECK-RV64C-NEXT:    ld a1, 8(a1)
3362; CHECK-RV64C-NEXT:    sd a2, 0(a0)
3363; CHECK-RV64C-NEXT:    sd a1, 8(a0)
3364; CHECK-RV64C-NEXT:    ret
3365;
3366; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v4i32:
3367; CHECK-RV32C:       # %bb.0:
3368; CHECK-RV32C-NEXT:    c.ntl.pall
3369; CHECK-RV32C-NEXT:    lw a2, 0(a1)
3370; CHECK-RV32C-NEXT:    c.ntl.pall
3371; CHECK-RV32C-NEXT:    lw a3, 4(a1)
3372; CHECK-RV32C-NEXT:    c.ntl.pall
3373; CHECK-RV32C-NEXT:    lw a4, 8(a1)
3374; CHECK-RV32C-NEXT:    c.ntl.pall
3375; CHECK-RV32C-NEXT:    lw a1, 12(a1)
3376; CHECK-RV32C-NEXT:    sw a2, 0(a0)
3377; CHECK-RV32C-NEXT:    sw a3, 4(a0)
3378; CHECK-RV32C-NEXT:    sw a4, 8(a0)
3379; CHECK-RV32C-NEXT:    sw a1, 12(a0)
3380; CHECK-RV32C-NEXT:    ret
3381;
3382; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v4i32:
3383; CHECK-RV64V:       # %bb.0:
3384; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3385; CHECK-RV64V-NEXT:    ntl.pall
3386; CHECK-RV64V-NEXT:    vle32.v v8, (a0)
3387; CHECK-RV64V-NEXT:    ret
3388;
3389; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_v4i32:
3390; CHECK-RV32V:       # %bb.0:
3391; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
3392; CHECK-RV32V-NEXT:    ntl.pall
3393; CHECK-RV32V-NEXT:    vle32.v v8, (a0)
3394; CHECK-RV32V-NEXT:    ret
3395  %1 = load <4 x i32>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3396  ret <4 x i32> %1
3397}
3398
3399define <2 x i64> @test_nontemporal_PALL_load_v2i64(ptr %p) {
3400; CHECK-RV64-LABEL: test_nontemporal_PALL_load_v2i64:
3401; CHECK-RV64:       # %bb.0:
3402; CHECK-RV64-NEXT:    ntl.pall
3403; CHECK-RV64-NEXT:    ld a2, 0(a0)
3404; CHECK-RV64-NEXT:    ntl.pall
3405; CHECK-RV64-NEXT:    ld a1, 8(a0)
3406; CHECK-RV64-NEXT:    mv a0, a2
3407; CHECK-RV64-NEXT:    ret
3408;
3409; CHECK-RV32-LABEL: test_nontemporal_PALL_load_v2i64:
3410; CHECK-RV32:       # %bb.0:
3411; CHECK-RV32-NEXT:    ntl.pall
3412; CHECK-RV32-NEXT:    lw a2, 0(a1)
3413; CHECK-RV32-NEXT:    ntl.pall
3414; CHECK-RV32-NEXT:    lw a3, 4(a1)
3415; CHECK-RV32-NEXT:    ntl.pall
3416; CHECK-RV32-NEXT:    lw a4, 8(a1)
3417; CHECK-RV32-NEXT:    ntl.pall
3418; CHECK-RV32-NEXT:    lw a1, 12(a1)
3419; CHECK-RV32-NEXT:    sw a2, 0(a0)
3420; CHECK-RV32-NEXT:    sw a3, 4(a0)
3421; CHECK-RV32-NEXT:    sw a4, 8(a0)
3422; CHECK-RV32-NEXT:    sw a1, 12(a0)
3423; CHECK-RV32-NEXT:    ret
3424;
3425; CHECK-RV64C-LABEL: test_nontemporal_PALL_load_v2i64:
3426; CHECK-RV64C:       # %bb.0:
3427; CHECK-RV64C-NEXT:    c.ntl.pall
3428; CHECK-RV64C-NEXT:    ld a2, 0(a0)
3429; CHECK-RV64C-NEXT:    c.ntl.pall
3430; CHECK-RV64C-NEXT:    ld a1, 8(a0)
3431; CHECK-RV64C-NEXT:    mv a0, a2
3432; CHECK-RV64C-NEXT:    ret
3433;
3434; CHECK-RV32C-LABEL: test_nontemporal_PALL_load_v2i64:
3435; CHECK-RV32C:       # %bb.0:
3436; CHECK-RV32C-NEXT:    c.ntl.pall
3437; CHECK-RV32C-NEXT:    lw a2, 0(a1)
3438; CHECK-RV32C-NEXT:    c.ntl.pall
3439; CHECK-RV32C-NEXT:    lw a3, 4(a1)
3440; CHECK-RV32C-NEXT:    c.ntl.pall
3441; CHECK-RV32C-NEXT:    lw a4, 8(a1)
3442; CHECK-RV32C-NEXT:    c.ntl.pall
3443; CHECK-RV32C-NEXT:    lw a1, 12(a1)
3444; CHECK-RV32C-NEXT:    sw a2, 0(a0)
3445; CHECK-RV32C-NEXT:    sw a3, 4(a0)
3446; CHECK-RV32C-NEXT:    sw a4, 8(a0)
3447; CHECK-RV32C-NEXT:    sw a1, 12(a0)
3448; CHECK-RV32C-NEXT:    ret
3449;
3450; CHECK-RV64V-LABEL: test_nontemporal_PALL_load_v2i64:
3451; CHECK-RV64V:       # %bb.0:
3452; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3453; CHECK-RV64V-NEXT:    ntl.pall
3454; CHECK-RV64V-NEXT:    vle64.v v8, (a0)
3455; CHECK-RV64V-NEXT:    ret
3456;
3457; CHECK-RV32V-LABEL: test_nontemporal_PALL_load_v2i64:
3458; CHECK-RV32V:       # %bb.0:
3459; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
3460; CHECK-RV32V-NEXT:    ntl.pall
3461; CHECK-RV32V-NEXT:    vle64.v v8, (a0)
3462; CHECK-RV32V-NEXT:    ret
3463  %1 = load <2 x i64>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3464  ret <2 x i64> %1
3465}
3466
3467define void @test_nontemporal_PALL_store_i64(ptr %p, i64 %v) {
3468; CHECK-RV64-LABEL: test_nontemporal_PALL_store_i64:
3469; CHECK-RV64:       # %bb.0:
3470; CHECK-RV64-NEXT:    ntl.pall
3471; CHECK-RV64-NEXT:    sd a1, 0(a0)
3472; CHECK-RV64-NEXT:    ret
3473;
3474; CHECK-RV32-LABEL: test_nontemporal_PALL_store_i64:
3475; CHECK-RV32:       # %bb.0:
3476; CHECK-RV32-NEXT:    ntl.pall
3477; CHECK-RV32-NEXT:    sw a1, 0(a0)
3478; CHECK-RV32-NEXT:    ntl.pall
3479; CHECK-RV32-NEXT:    sw a2, 4(a0)
3480; CHECK-RV32-NEXT:    ret
3481;
3482; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_i64:
3483; CHECK-RV64C:       # %bb.0:
3484; CHECK-RV64C-NEXT:    c.ntl.pall
3485; CHECK-RV64C-NEXT:    sd a1, 0(a0)
3486; CHECK-RV64C-NEXT:    ret
3487;
3488; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_i64:
3489; CHECK-RV32C:       # %bb.0:
3490; CHECK-RV32C-NEXT:    c.ntl.pall
3491; CHECK-RV32C-NEXT:    sw a1, 0(a0)
3492; CHECK-RV32C-NEXT:    c.ntl.pall
3493; CHECK-RV32C-NEXT:    sw a2, 4(a0)
3494; CHECK-RV32C-NEXT:    ret
3495;
3496; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_i64:
3497; CHECK-RV64V:       # %bb.0:
3498; CHECK-RV64V-NEXT:    ntl.pall
3499; CHECK-RV64V-NEXT:    sd a1, 0(a0)
3500; CHECK-RV64V-NEXT:    ret
3501;
3502; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_i64:
3503; CHECK-RV32V:       # %bb.0:
3504; CHECK-RV32V-NEXT:    ntl.pall
3505; CHECK-RV32V-NEXT:    sw a1, 0(a0)
3506; CHECK-RV32V-NEXT:    ntl.pall
3507; CHECK-RV32V-NEXT:    sw a2, 4(a0)
3508; CHECK-RV32V-NEXT:    ret
3509  store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3510  ret void
3511}
3512
3513define void @test_nontemporal_PALL_store_i32(ptr %p, i32 %v) {
3514; CHECK-RV64-LABEL: test_nontemporal_PALL_store_i32:
3515; CHECK-RV64:       # %bb.0:
3516; CHECK-RV64-NEXT:    ntl.pall
3517; CHECK-RV64-NEXT:    sw a1, 0(a0)
3518; CHECK-RV64-NEXT:    ret
3519;
3520; CHECK-RV32-LABEL: test_nontemporal_PALL_store_i32:
3521; CHECK-RV32:       # %bb.0:
3522; CHECK-RV32-NEXT:    ntl.pall
3523; CHECK-RV32-NEXT:    sw a1, 0(a0)
3524; CHECK-RV32-NEXT:    ret
3525;
3526; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_i32:
3527; CHECK-RV64C:       # %bb.0:
3528; CHECK-RV64C-NEXT:    c.ntl.pall
3529; CHECK-RV64C-NEXT:    sw a1, 0(a0)
3530; CHECK-RV64C-NEXT:    ret
3531;
3532; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_i32:
3533; CHECK-RV32C:       # %bb.0:
3534; CHECK-RV32C-NEXT:    c.ntl.pall
3535; CHECK-RV32C-NEXT:    sw a1, 0(a0)
3536; CHECK-RV32C-NEXT:    ret
3537;
3538; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_i32:
3539; CHECK-RV64V:       # %bb.0:
3540; CHECK-RV64V-NEXT:    ntl.pall
3541; CHECK-RV64V-NEXT:    sw a1, 0(a0)
3542; CHECK-RV64V-NEXT:    ret
3543;
3544; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_i32:
3545; CHECK-RV32V:       # %bb.0:
3546; CHECK-RV32V-NEXT:    ntl.pall
3547; CHECK-RV32V-NEXT:    sw a1, 0(a0)
3548; CHECK-RV32V-NEXT:    ret
3549  store i32 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3550  ret void
3551}
3552
3553define void @test_nontemporal_PALL_store_i16(ptr %p, i16 %v) {
3554; CHECK-RV64-LABEL: test_nontemporal_PALL_store_i16:
3555; CHECK-RV64:       # %bb.0:
3556; CHECK-RV64-NEXT:    ntl.pall
3557; CHECK-RV64-NEXT:    sh a1, 0(a0)
3558; CHECK-RV64-NEXT:    ret
3559;
3560; CHECK-RV32-LABEL: test_nontemporal_PALL_store_i16:
3561; CHECK-RV32:       # %bb.0:
3562; CHECK-RV32-NEXT:    ntl.pall
3563; CHECK-RV32-NEXT:    sh a1, 0(a0)
3564; CHECK-RV32-NEXT:    ret
3565;
3566; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_i16:
3567; CHECK-RV64C:       # %bb.0:
3568; CHECK-RV64C-NEXT:    c.ntl.pall
3569; CHECK-RV64C-NEXT:    sh a1, 0(a0)
3570; CHECK-RV64C-NEXT:    ret
3571;
3572; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_i16:
3573; CHECK-RV32C:       # %bb.0:
3574; CHECK-RV32C-NEXT:    c.ntl.pall
3575; CHECK-RV32C-NEXT:    sh a1, 0(a0)
3576; CHECK-RV32C-NEXT:    ret
3577;
3578; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_i16:
3579; CHECK-RV64V:       # %bb.0:
3580; CHECK-RV64V-NEXT:    ntl.pall
3581; CHECK-RV64V-NEXT:    sh a1, 0(a0)
3582; CHECK-RV64V-NEXT:    ret
3583;
3584; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_i16:
3585; CHECK-RV32V:       # %bb.0:
3586; CHECK-RV32V-NEXT:    ntl.pall
3587; CHECK-RV32V-NEXT:    sh a1, 0(a0)
3588; CHECK-RV32V-NEXT:    ret
3589  store i16 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3590  ret void
3591}
3592
3593define void @test_nontemporal_PALL_store_i8(ptr %p, i8 %v) {
3594; CHECK-RV64-LABEL: test_nontemporal_PALL_store_i8:
3595; CHECK-RV64:       # %bb.0:
3596; CHECK-RV64-NEXT:    ntl.pall
3597; CHECK-RV64-NEXT:    sb a1, 0(a0)
3598; CHECK-RV64-NEXT:    ret
3599;
3600; CHECK-RV32-LABEL: test_nontemporal_PALL_store_i8:
3601; CHECK-RV32:       # %bb.0:
3602; CHECK-RV32-NEXT:    ntl.pall
3603; CHECK-RV32-NEXT:    sb a1, 0(a0)
3604; CHECK-RV32-NEXT:    ret
3605;
3606; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_i8:
3607; CHECK-RV64C:       # %bb.0:
3608; CHECK-RV64C-NEXT:    c.ntl.pall
3609; CHECK-RV64C-NEXT:    sb a1, 0(a0)
3610; CHECK-RV64C-NEXT:    ret
3611;
3612; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_i8:
3613; CHECK-RV32C:       # %bb.0:
3614; CHECK-RV32C-NEXT:    c.ntl.pall
3615; CHECK-RV32C-NEXT:    sb a1, 0(a0)
3616; CHECK-RV32C-NEXT:    ret
3617;
3618; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_i8:
3619; CHECK-RV64V:       # %bb.0:
3620; CHECK-RV64V-NEXT:    ntl.pall
3621; CHECK-RV64V-NEXT:    sb a1, 0(a0)
3622; CHECK-RV64V-NEXT:    ret
3623;
3624; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_i8:
3625; CHECK-RV32V:       # %bb.0:
3626; CHECK-RV32V-NEXT:    ntl.pall
3627; CHECK-RV32V-NEXT:    sb a1, 0(a0)
3628; CHECK-RV32V-NEXT:    ret
3629  store i8 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3630  ret void
3631}
3632
3633define void @test_nontemporal_PALL_store_half(ptr %p, half %v) {
3634; CHECK-RV64-LABEL: test_nontemporal_PALL_store_half:
3635; CHECK-RV64:       # %bb.0:
3636; CHECK-RV64-NEXT:    ntl.pall
3637; CHECK-RV64-NEXT:    fsh fa0, 0(a0)
3638; CHECK-RV64-NEXT:    ret
3639;
3640; CHECK-RV32-LABEL: test_nontemporal_PALL_store_half:
3641; CHECK-RV32:       # %bb.0:
3642; CHECK-RV32-NEXT:    ntl.pall
3643; CHECK-RV32-NEXT:    fsh fa0, 0(a0)
3644; CHECK-RV32-NEXT:    ret
3645;
3646; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_half:
3647; CHECK-RV64C:       # %bb.0:
3648; CHECK-RV64C-NEXT:    c.ntl.pall
3649; CHECK-RV64C-NEXT:    fsh fa0, 0(a0)
3650; CHECK-RV64C-NEXT:    ret
3651;
3652; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_half:
3653; CHECK-RV32C:       # %bb.0:
3654; CHECK-RV32C-NEXT:    c.ntl.pall
3655; CHECK-RV32C-NEXT:    fsh fa0, 0(a0)
3656; CHECK-RV32C-NEXT:    ret
3657;
3658; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_half:
3659; CHECK-RV64V:       # %bb.0:
3660; CHECK-RV64V-NEXT:    ntl.pall
3661; CHECK-RV64V-NEXT:    fsh fa0, 0(a0)
3662; CHECK-RV64V-NEXT:    ret
3663;
3664; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_half:
3665; CHECK-RV32V:       # %bb.0:
3666; CHECK-RV32V-NEXT:    ntl.pall
3667; CHECK-RV32V-NEXT:    fsh fa0, 0(a0)
3668; CHECK-RV32V-NEXT:    ret
3669  store half %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3670  ret void
3671}
3672
3673define void @test_nontemporal_PALL_store_float(ptr %p, float %v) {
3674; CHECK-RV64-LABEL: test_nontemporal_PALL_store_float:
3675; CHECK-RV64:       # %bb.0:
3676; CHECK-RV64-NEXT:    ntl.pall
3677; CHECK-RV64-NEXT:    fsw fa0, 0(a0)
3678; CHECK-RV64-NEXT:    ret
3679;
3680; CHECK-RV32-LABEL: test_nontemporal_PALL_store_float:
3681; CHECK-RV32:       # %bb.0:
3682; CHECK-RV32-NEXT:    ntl.pall
3683; CHECK-RV32-NEXT:    fsw fa0, 0(a0)
3684; CHECK-RV32-NEXT:    ret
3685;
3686; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_float:
3687; CHECK-RV64C:       # %bb.0:
3688; CHECK-RV64C-NEXT:    c.ntl.pall
3689; CHECK-RV64C-NEXT:    fsw fa0, 0(a0)
3690; CHECK-RV64C-NEXT:    ret
3691;
3692; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_float:
3693; CHECK-RV32C:       # %bb.0:
3694; CHECK-RV32C-NEXT:    c.ntl.pall
3695; CHECK-RV32C-NEXT:    fsw fa0, 0(a0)
3696; CHECK-RV32C-NEXT:    ret
3697;
3698; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_float:
3699; CHECK-RV64V:       # %bb.0:
3700; CHECK-RV64V-NEXT:    ntl.pall
3701; CHECK-RV64V-NEXT:    fsw fa0, 0(a0)
3702; CHECK-RV64V-NEXT:    ret
3703;
3704; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_float:
3705; CHECK-RV32V:       # %bb.0:
3706; CHECK-RV32V-NEXT:    ntl.pall
3707; CHECK-RV32V-NEXT:    fsw fa0, 0(a0)
3708; CHECK-RV32V-NEXT:    ret
3709  store float %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3710  ret void
3711}
3712
3713define void @test_nontemporal_PALL_store_double(ptr %p, double %v) {
3714; CHECK-RV64-LABEL: test_nontemporal_PALL_store_double:
3715; CHECK-RV64:       # %bb.0:
3716; CHECK-RV64-NEXT:    ntl.pall
3717; CHECK-RV64-NEXT:    fsd fa0, 0(a0)
3718; CHECK-RV64-NEXT:    ret
3719;
3720; CHECK-RV32-LABEL: test_nontemporal_PALL_store_double:
3721; CHECK-RV32:       # %bb.0:
3722; CHECK-RV32-NEXT:    ntl.pall
3723; CHECK-RV32-NEXT:    fsd fa0, 0(a0)
3724; CHECK-RV32-NEXT:    ret
3725;
3726; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_double:
3727; CHECK-RV64C:       # %bb.0:
3728; CHECK-RV64C-NEXT:    c.ntl.pall
3729; CHECK-RV64C-NEXT:    fsd fa0, 0(a0)
3730; CHECK-RV64C-NEXT:    ret
3731;
3732; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_double:
3733; CHECK-RV32C:       # %bb.0:
3734; CHECK-RV32C-NEXT:    c.ntl.pall
3735; CHECK-RV32C-NEXT:    fsd fa0, 0(a0)
3736; CHECK-RV32C-NEXT:    ret
3737;
3738; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_double:
3739; CHECK-RV64V:       # %bb.0:
3740; CHECK-RV64V-NEXT:    ntl.pall
3741; CHECK-RV64V-NEXT:    fsd fa0, 0(a0)
3742; CHECK-RV64V-NEXT:    ret
3743;
3744; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_double:
3745; CHECK-RV32V:       # %bb.0:
3746; CHECK-RV32V-NEXT:    ntl.pall
3747; CHECK-RV32V-NEXT:    fsd fa0, 0(a0)
3748; CHECK-RV32V-NEXT:    ret
3749  store double %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
3750  ret void
3751}
3752
3753define void @test_nontemporal_PALL_store_v16i8(ptr %p, <16 x i8> %v) {
3754; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v16i8:
3755; CHECK-RV64:       # %bb.0:
3756; CHECK-RV64-NEXT:    addi sp, sp, -16
3757; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 16
3758; CHECK-RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
3759; CHECK-RV64-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
3760; CHECK-RV64-NEXT:    .cfi_offset s0, -8
3761; CHECK-RV64-NEXT:    .cfi_offset s1, -16
3762; CHECK-RV64-NEXT:    lbu a2, 0(a1)
3763; CHECK-RV64-NEXT:    lbu a3, 8(a1)
3764; CHECK-RV64-NEXT:    lbu a4, 16(a1)
3765; CHECK-RV64-NEXT:    lbu a5, 24(a1)
3766; CHECK-RV64-NEXT:    lbu a6, 32(a1)
3767; CHECK-RV64-NEXT:    lbu a7, 40(a1)
3768; CHECK-RV64-NEXT:    lbu t0, 48(a1)
3769; CHECK-RV64-NEXT:    lbu t1, 56(a1)
3770; CHECK-RV64-NEXT:    lbu t2, 64(a1)
3771; CHECK-RV64-NEXT:    lbu t3, 72(a1)
3772; CHECK-RV64-NEXT:    lbu t4, 80(a1)
3773; CHECK-RV64-NEXT:    lbu t5, 88(a1)
3774; CHECK-RV64-NEXT:    lbu t6, 96(a1)
3775; CHECK-RV64-NEXT:    lbu s0, 104(a1)
3776; CHECK-RV64-NEXT:    lbu s1, 112(a1)
3777; CHECK-RV64-NEXT:    lbu a1, 120(a1)
3778; CHECK-RV64-NEXT:    ntl.pall
3779; CHECK-RV64-NEXT:    sb t6, 12(a0)
3780; CHECK-RV64-NEXT:    ntl.pall
3781; CHECK-RV64-NEXT:    sb s0, 13(a0)
3782; CHECK-RV64-NEXT:    ntl.pall
3783; CHECK-RV64-NEXT:    sb s1, 14(a0)
3784; CHECK-RV64-NEXT:    ntl.pall
3785; CHECK-RV64-NEXT:    sb a1, 15(a0)
3786; CHECK-RV64-NEXT:    ntl.pall
3787; CHECK-RV64-NEXT:    sb t2, 8(a0)
3788; CHECK-RV64-NEXT:    ntl.pall
3789; CHECK-RV64-NEXT:    sb t3, 9(a0)
3790; CHECK-RV64-NEXT:    ntl.pall
3791; CHECK-RV64-NEXT:    sb t4, 10(a0)
3792; CHECK-RV64-NEXT:    ntl.pall
3793; CHECK-RV64-NEXT:    sb t5, 11(a0)
3794; CHECK-RV64-NEXT:    ntl.pall
3795; CHECK-RV64-NEXT:    sb a6, 4(a0)
3796; CHECK-RV64-NEXT:    ntl.pall
3797; CHECK-RV64-NEXT:    sb a7, 5(a0)
3798; CHECK-RV64-NEXT:    ntl.pall
3799; CHECK-RV64-NEXT:    sb t0, 6(a0)
3800; CHECK-RV64-NEXT:    ntl.pall
3801; CHECK-RV64-NEXT:    sb t1, 7(a0)
3802; CHECK-RV64-NEXT:    ntl.pall
3803; CHECK-RV64-NEXT:    sb a2, 0(a0)
3804; CHECK-RV64-NEXT:    ntl.pall
3805; CHECK-RV64-NEXT:    sb a3, 1(a0)
3806; CHECK-RV64-NEXT:    ntl.pall
3807; CHECK-RV64-NEXT:    sb a4, 2(a0)
3808; CHECK-RV64-NEXT:    ntl.pall
3809; CHECK-RV64-NEXT:    sb a5, 3(a0)
3810; CHECK-RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
3811; CHECK-RV64-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
3812; CHECK-RV64-NEXT:    .cfi_restore s0
3813; CHECK-RV64-NEXT:    .cfi_restore s1
3814; CHECK-RV64-NEXT:    addi sp, sp, 16
3815; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 0
3816; CHECK-RV64-NEXT:    ret
3817;
3818; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v16i8:
3819; CHECK-RV32:       # %bb.0:
3820; CHECK-RV32-NEXT:    addi sp, sp, -16
3821; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 16
3822; CHECK-RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
3823; CHECK-RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
3824; CHECK-RV32-NEXT:    .cfi_offset s0, -4
3825; CHECK-RV32-NEXT:    .cfi_offset s1, -8
3826; CHECK-RV32-NEXT:    lbu a2, 0(a1)
3827; CHECK-RV32-NEXT:    lbu a3, 4(a1)
3828; CHECK-RV32-NEXT:    lbu a4, 8(a1)
3829; CHECK-RV32-NEXT:    lbu a5, 12(a1)
3830; CHECK-RV32-NEXT:    lbu a6, 16(a1)
3831; CHECK-RV32-NEXT:    lbu a7, 20(a1)
3832; CHECK-RV32-NEXT:    lbu t0, 24(a1)
3833; CHECK-RV32-NEXT:    lbu t1, 28(a1)
3834; CHECK-RV32-NEXT:    lbu t2, 32(a1)
3835; CHECK-RV32-NEXT:    lbu t3, 36(a1)
3836; CHECK-RV32-NEXT:    lbu t4, 40(a1)
3837; CHECK-RV32-NEXT:    lbu t5, 44(a1)
3838; CHECK-RV32-NEXT:    lbu t6, 48(a1)
3839; CHECK-RV32-NEXT:    lbu s0, 52(a1)
3840; CHECK-RV32-NEXT:    lbu s1, 56(a1)
3841; CHECK-RV32-NEXT:    lbu a1, 60(a1)
3842; CHECK-RV32-NEXT:    ntl.pall
3843; CHECK-RV32-NEXT:    sb t6, 12(a0)
3844; CHECK-RV32-NEXT:    ntl.pall
3845; CHECK-RV32-NEXT:    sb s0, 13(a0)
3846; CHECK-RV32-NEXT:    ntl.pall
3847; CHECK-RV32-NEXT:    sb s1, 14(a0)
3848; CHECK-RV32-NEXT:    ntl.pall
3849; CHECK-RV32-NEXT:    sb a1, 15(a0)
3850; CHECK-RV32-NEXT:    ntl.pall
3851; CHECK-RV32-NEXT:    sb t2, 8(a0)
3852; CHECK-RV32-NEXT:    ntl.pall
3853; CHECK-RV32-NEXT:    sb t3, 9(a0)
3854; CHECK-RV32-NEXT:    ntl.pall
3855; CHECK-RV32-NEXT:    sb t4, 10(a0)
3856; CHECK-RV32-NEXT:    ntl.pall
3857; CHECK-RV32-NEXT:    sb t5, 11(a0)
3858; CHECK-RV32-NEXT:    ntl.pall
3859; CHECK-RV32-NEXT:    sb a6, 4(a0)
3860; CHECK-RV32-NEXT:    ntl.pall
3861; CHECK-RV32-NEXT:    sb a7, 5(a0)
3862; CHECK-RV32-NEXT:    ntl.pall
3863; CHECK-RV32-NEXT:    sb t0, 6(a0)
3864; CHECK-RV32-NEXT:    ntl.pall
3865; CHECK-RV32-NEXT:    sb t1, 7(a0)
3866; CHECK-RV32-NEXT:    ntl.pall
3867; CHECK-RV32-NEXT:    sb a2, 0(a0)
3868; CHECK-RV32-NEXT:    ntl.pall
3869; CHECK-RV32-NEXT:    sb a3, 1(a0)
3870; CHECK-RV32-NEXT:    ntl.pall
3871; CHECK-RV32-NEXT:    sb a4, 2(a0)
3872; CHECK-RV32-NEXT:    ntl.pall
3873; CHECK-RV32-NEXT:    sb a5, 3(a0)
3874; CHECK-RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
3875; CHECK-RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
3876; CHECK-RV32-NEXT:    .cfi_restore s0
3877; CHECK-RV32-NEXT:    .cfi_restore s1
3878; CHECK-RV32-NEXT:    addi sp, sp, 16
3879; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 0
3880; CHECK-RV32-NEXT:    ret
3881;
3882; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v16i8:
3883; CHECK-RV64C:       # %bb.0:
3884; CHECK-RV64C-NEXT:    addi sp, sp, -16
3885; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 16
3886; CHECK-RV64C-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
3887; CHECK-RV64C-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
3888; CHECK-RV64C-NEXT:    .cfi_offset s0, -8
3889; CHECK-RV64C-NEXT:    .cfi_offset s1, -16
3890; CHECK-RV64C-NEXT:    lbu a6, 0(a1)
3891; CHECK-RV64C-NEXT:    lbu a7, 8(a1)
3892; CHECK-RV64C-NEXT:    lbu t0, 16(a1)
3893; CHECK-RV64C-NEXT:    lbu t1, 24(a1)
3894; CHECK-RV64C-NEXT:    lbu t2, 32(a1)
3895; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
3896; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
3897; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
3898; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
3899; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
3900; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
3901; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
3902; CHECK-RV64C-NEXT:    lbu a2, 96(a1)
3903; CHECK-RV64C-NEXT:    lbu s0, 104(a1)
3904; CHECK-RV64C-NEXT:    lbu s1, 112(a1)
3905; CHECK-RV64C-NEXT:    lbu a1, 120(a1)
3906; CHECK-RV64C-NEXT:    c.ntl.pall
3907; CHECK-RV64C-NEXT:    sb a2, 12(a0)
3908; CHECK-RV64C-NEXT:    c.ntl.pall
3909; CHECK-RV64C-NEXT:    sb s0, 13(a0)
3910; CHECK-RV64C-NEXT:    c.ntl.pall
3911; CHECK-RV64C-NEXT:    sb s1, 14(a0)
3912; CHECK-RV64C-NEXT:    c.ntl.pall
3913; CHECK-RV64C-NEXT:    sb a1, 15(a0)
3914; CHECK-RV64C-NEXT:    c.ntl.pall
3915; CHECK-RV64C-NEXT:    sb t6, 8(a0)
3916; CHECK-RV64C-NEXT:    c.ntl.pall
3917; CHECK-RV64C-NEXT:    sb a3, 9(a0)
3918; CHECK-RV64C-NEXT:    c.ntl.pall
3919; CHECK-RV64C-NEXT:    sb a4, 10(a0)
3920; CHECK-RV64C-NEXT:    c.ntl.pall
3921; CHECK-RV64C-NEXT:    sb a5, 11(a0)
3922; CHECK-RV64C-NEXT:    c.ntl.pall
3923; CHECK-RV64C-NEXT:    sb t2, 4(a0)
3924; CHECK-RV64C-NEXT:    c.ntl.pall
3925; CHECK-RV64C-NEXT:    sb t3, 5(a0)
3926; CHECK-RV64C-NEXT:    c.ntl.pall
3927; CHECK-RV64C-NEXT:    sb t4, 6(a0)
3928; CHECK-RV64C-NEXT:    c.ntl.pall
3929; CHECK-RV64C-NEXT:    sb t5, 7(a0)
3930; CHECK-RV64C-NEXT:    c.ntl.pall
3931; CHECK-RV64C-NEXT:    sb a6, 0(a0)
3932; CHECK-RV64C-NEXT:    c.ntl.pall
3933; CHECK-RV64C-NEXT:    sb a7, 1(a0)
3934; CHECK-RV64C-NEXT:    c.ntl.pall
3935; CHECK-RV64C-NEXT:    sb t0, 2(a0)
3936; CHECK-RV64C-NEXT:    c.ntl.pall
3937; CHECK-RV64C-NEXT:    sb t1, 3(a0)
3938; CHECK-RV64C-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
3939; CHECK-RV64C-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
3940; CHECK-RV64C-NEXT:    .cfi_restore s0
3941; CHECK-RV64C-NEXT:    .cfi_restore s1
3942; CHECK-RV64C-NEXT:    addi sp, sp, 16
3943; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 0
3944; CHECK-RV64C-NEXT:    ret
3945;
3946; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v16i8:
3947; CHECK-RV32C:       # %bb.0:
3948; CHECK-RV32C-NEXT:    addi sp, sp, -16
3949; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 16
3950; CHECK-RV32C-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
3951; CHECK-RV32C-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
3952; CHECK-RV32C-NEXT:    .cfi_offset s0, -4
3953; CHECK-RV32C-NEXT:    .cfi_offset s1, -8
3954; CHECK-RV32C-NEXT:    lbu a6, 0(a1)
3955; CHECK-RV32C-NEXT:    lbu a7, 4(a1)
3956; CHECK-RV32C-NEXT:    lbu t0, 8(a1)
3957; CHECK-RV32C-NEXT:    lbu t1, 12(a1)
3958; CHECK-RV32C-NEXT:    lbu t2, 16(a1)
3959; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
3960; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
3961; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
3962; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
3963; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
3964; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
3965; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
3966; CHECK-RV32C-NEXT:    lbu a2, 48(a1)
3967; CHECK-RV32C-NEXT:    lbu s0, 52(a1)
3968; CHECK-RV32C-NEXT:    lbu s1, 56(a1)
3969; CHECK-RV32C-NEXT:    lbu a1, 60(a1)
3970; CHECK-RV32C-NEXT:    c.ntl.pall
3971; CHECK-RV32C-NEXT:    sb a2, 12(a0)
3972; CHECK-RV32C-NEXT:    c.ntl.pall
3973; CHECK-RV32C-NEXT:    sb s0, 13(a0)
3974; CHECK-RV32C-NEXT:    c.ntl.pall
3975; CHECK-RV32C-NEXT:    sb s1, 14(a0)
3976; CHECK-RV32C-NEXT:    c.ntl.pall
3977; CHECK-RV32C-NEXT:    sb a1, 15(a0)
3978; CHECK-RV32C-NEXT:    c.ntl.pall
3979; CHECK-RV32C-NEXT:    sb t6, 8(a0)
3980; CHECK-RV32C-NEXT:    c.ntl.pall
3981; CHECK-RV32C-NEXT:    sb a3, 9(a0)
3982; CHECK-RV32C-NEXT:    c.ntl.pall
3983; CHECK-RV32C-NEXT:    sb a4, 10(a0)
3984; CHECK-RV32C-NEXT:    c.ntl.pall
3985; CHECK-RV32C-NEXT:    sb a5, 11(a0)
3986; CHECK-RV32C-NEXT:    c.ntl.pall
3987; CHECK-RV32C-NEXT:    sb t2, 4(a0)
3988; CHECK-RV32C-NEXT:    c.ntl.pall
3989; CHECK-RV32C-NEXT:    sb t3, 5(a0)
3990; CHECK-RV32C-NEXT:    c.ntl.pall
3991; CHECK-RV32C-NEXT:    sb t4, 6(a0)
3992; CHECK-RV32C-NEXT:    c.ntl.pall
3993; CHECK-RV32C-NEXT:    sb t5, 7(a0)
3994; CHECK-RV32C-NEXT:    c.ntl.pall
3995; CHECK-RV32C-NEXT:    sb a6, 0(a0)
3996; CHECK-RV32C-NEXT:    c.ntl.pall
3997; CHECK-RV32C-NEXT:    sb a7, 1(a0)
3998; CHECK-RV32C-NEXT:    c.ntl.pall
3999; CHECK-RV32C-NEXT:    sb t0, 2(a0)
4000; CHECK-RV32C-NEXT:    c.ntl.pall
4001; CHECK-RV32C-NEXT:    sb t1, 3(a0)
4002; CHECK-RV32C-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
4003; CHECK-RV32C-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
4004; CHECK-RV32C-NEXT:    .cfi_restore s0
4005; CHECK-RV32C-NEXT:    .cfi_restore s1
4006; CHECK-RV32C-NEXT:    addi sp, sp, 16
4007; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 0
4008; CHECK-RV32C-NEXT:    ret
4009;
4010; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v16i8:
4011; CHECK-RV64V:       # %bb.0:
4012; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
4013; CHECK-RV64V-NEXT:    ntl.pall
4014; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
4015; CHECK-RV64V-NEXT:    ret
4016;
4017; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_v16i8:
4018; CHECK-RV32V:       # %bb.0:
4019; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
4020; CHECK-RV32V-NEXT:    ntl.pall
4021; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
4022; CHECK-RV32V-NEXT:    ret
4023  store <16 x i8> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
4024  ret void
4025}
4026
4027define void @test_nontemporal_PALL_store_v8i16(ptr %p, <8 x i16> %v) {
4028; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v8i16:
4029; CHECK-RV64:       # %bb.0:
4030; CHECK-RV64-NEXT:    lh a2, 0(a1)
4031; CHECK-RV64-NEXT:    lh a3, 8(a1)
4032; CHECK-RV64-NEXT:    lh a4, 16(a1)
4033; CHECK-RV64-NEXT:    lh a5, 24(a1)
4034; CHECK-RV64-NEXT:    lh a6, 32(a1)
4035; CHECK-RV64-NEXT:    lh a7, 40(a1)
4036; CHECK-RV64-NEXT:    lh t0, 48(a1)
4037; CHECK-RV64-NEXT:    lh a1, 56(a1)
4038; CHECK-RV64-NEXT:    ntl.pall
4039; CHECK-RV64-NEXT:    sh a6, 8(a0)
4040; CHECK-RV64-NEXT:    ntl.pall
4041; CHECK-RV64-NEXT:    sh a7, 10(a0)
4042; CHECK-RV64-NEXT:    ntl.pall
4043; CHECK-RV64-NEXT:    sh t0, 12(a0)
4044; CHECK-RV64-NEXT:    ntl.pall
4045; CHECK-RV64-NEXT:    sh a1, 14(a0)
4046; CHECK-RV64-NEXT:    ntl.pall
4047; CHECK-RV64-NEXT:    sh a2, 0(a0)
4048; CHECK-RV64-NEXT:    ntl.pall
4049; CHECK-RV64-NEXT:    sh a3, 2(a0)
4050; CHECK-RV64-NEXT:    ntl.pall
4051; CHECK-RV64-NEXT:    sh a4, 4(a0)
4052; CHECK-RV64-NEXT:    ntl.pall
4053; CHECK-RV64-NEXT:    sh a5, 6(a0)
4054; CHECK-RV64-NEXT:    ret
4055;
4056; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v8i16:
4057; CHECK-RV32:       # %bb.0:
4058; CHECK-RV32-NEXT:    lh a2, 0(a1)
4059; CHECK-RV32-NEXT:    lh a3, 4(a1)
4060; CHECK-RV32-NEXT:    lh a4, 8(a1)
4061; CHECK-RV32-NEXT:    lh a5, 12(a1)
4062; CHECK-RV32-NEXT:    lh a6, 16(a1)
4063; CHECK-RV32-NEXT:    lh a7, 20(a1)
4064; CHECK-RV32-NEXT:    lh t0, 24(a1)
4065; CHECK-RV32-NEXT:    lh a1, 28(a1)
4066; CHECK-RV32-NEXT:    ntl.pall
4067; CHECK-RV32-NEXT:    sh a6, 8(a0)
4068; CHECK-RV32-NEXT:    ntl.pall
4069; CHECK-RV32-NEXT:    sh a7, 10(a0)
4070; CHECK-RV32-NEXT:    ntl.pall
4071; CHECK-RV32-NEXT:    sh t0, 12(a0)
4072; CHECK-RV32-NEXT:    ntl.pall
4073; CHECK-RV32-NEXT:    sh a1, 14(a0)
4074; CHECK-RV32-NEXT:    ntl.pall
4075; CHECK-RV32-NEXT:    sh a2, 0(a0)
4076; CHECK-RV32-NEXT:    ntl.pall
4077; CHECK-RV32-NEXT:    sh a3, 2(a0)
4078; CHECK-RV32-NEXT:    ntl.pall
4079; CHECK-RV32-NEXT:    sh a4, 4(a0)
4080; CHECK-RV32-NEXT:    ntl.pall
4081; CHECK-RV32-NEXT:    sh a5, 6(a0)
4082; CHECK-RV32-NEXT:    ret
4083;
4084; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v8i16:
4085; CHECK-RV64C:       # %bb.0:
4086; CHECK-RV64C-NEXT:    lh a6, 0(a1)
4087; CHECK-RV64C-NEXT:    lh a7, 8(a1)
4088; CHECK-RV64C-NEXT:    lh t0, 16(a1)
4089; CHECK-RV64C-NEXT:    lh a5, 24(a1)
4090; CHECK-RV64C-NEXT:    lh a2, 32(a1)
4091; CHECK-RV64C-NEXT:    lh a3, 40(a1)
4092; CHECK-RV64C-NEXT:    lh a4, 48(a1)
4093; CHECK-RV64C-NEXT:    lh a1, 56(a1)
4094; CHECK-RV64C-NEXT:    c.ntl.pall
4095; CHECK-RV64C-NEXT:    sh a2, 8(a0)
4096; CHECK-RV64C-NEXT:    c.ntl.pall
4097; CHECK-RV64C-NEXT:    sh a3, 10(a0)
4098; CHECK-RV64C-NEXT:    c.ntl.pall
4099; CHECK-RV64C-NEXT:    sh a4, 12(a0)
4100; CHECK-RV64C-NEXT:    c.ntl.pall
4101; CHECK-RV64C-NEXT:    sh a1, 14(a0)
4102; CHECK-RV64C-NEXT:    c.ntl.pall
4103; CHECK-RV64C-NEXT:    sh a6, 0(a0)
4104; CHECK-RV64C-NEXT:    c.ntl.pall
4105; CHECK-RV64C-NEXT:    sh a7, 2(a0)
4106; CHECK-RV64C-NEXT:    c.ntl.pall
4107; CHECK-RV64C-NEXT:    sh t0, 4(a0)
4108; CHECK-RV64C-NEXT:    c.ntl.pall
4109; CHECK-RV64C-NEXT:    sh a5, 6(a0)
4110; CHECK-RV64C-NEXT:    ret
4111;
4112; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v8i16:
4113; CHECK-RV32C:       # %bb.0:
4114; CHECK-RV32C-NEXT:    lh a6, 0(a1)
4115; CHECK-RV32C-NEXT:    lh a7, 4(a1)
4116; CHECK-RV32C-NEXT:    lh t0, 8(a1)
4117; CHECK-RV32C-NEXT:    lh a5, 12(a1)
4118; CHECK-RV32C-NEXT:    lh a2, 16(a1)
4119; CHECK-RV32C-NEXT:    lh a3, 20(a1)
4120; CHECK-RV32C-NEXT:    lh a4, 24(a1)
4121; CHECK-RV32C-NEXT:    lh a1, 28(a1)
4122; CHECK-RV32C-NEXT:    c.ntl.pall
4123; CHECK-RV32C-NEXT:    sh a2, 8(a0)
4124; CHECK-RV32C-NEXT:    c.ntl.pall
4125; CHECK-RV32C-NEXT:    sh a3, 10(a0)
4126; CHECK-RV32C-NEXT:    c.ntl.pall
4127; CHECK-RV32C-NEXT:    sh a4, 12(a0)
4128; CHECK-RV32C-NEXT:    c.ntl.pall
4129; CHECK-RV32C-NEXT:    sh a1, 14(a0)
4130; CHECK-RV32C-NEXT:    c.ntl.pall
4131; CHECK-RV32C-NEXT:    sh a6, 0(a0)
4132; CHECK-RV32C-NEXT:    c.ntl.pall
4133; CHECK-RV32C-NEXT:    sh a7, 2(a0)
4134; CHECK-RV32C-NEXT:    c.ntl.pall
4135; CHECK-RV32C-NEXT:    sh t0, 4(a0)
4136; CHECK-RV32C-NEXT:    c.ntl.pall
4137; CHECK-RV32C-NEXT:    sh a5, 6(a0)
4138; CHECK-RV32C-NEXT:    ret
4139;
4140; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v8i16:
4141; CHECK-RV64V:       # %bb.0:
4142; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4143; CHECK-RV64V-NEXT:    ntl.pall
4144; CHECK-RV64V-NEXT:    vse16.v v8, (a0)
4145; CHECK-RV64V-NEXT:    ret
4146;
4147; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_v8i16:
4148; CHECK-RV32V:       # %bb.0:
4149; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4150; CHECK-RV32V-NEXT:    ntl.pall
4151; CHECK-RV32V-NEXT:    vse16.v v8, (a0)
4152; CHECK-RV32V-NEXT:    ret
4153  store <8 x i16> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
4154  ret void
4155}
4156
4157define void @test_nontemporal_PALL_store_v4i32(ptr %p, <4 x i32> %v) {
4158; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v4i32:
4159; CHECK-RV64:       # %bb.0:
4160; CHECK-RV64-NEXT:    lw a2, 0(a1)
4161; CHECK-RV64-NEXT:    lw a3, 8(a1)
4162; CHECK-RV64-NEXT:    lw a4, 16(a1)
4163; CHECK-RV64-NEXT:    lw a1, 24(a1)
4164; CHECK-RV64-NEXT:    ntl.pall
4165; CHECK-RV64-NEXT:    sw a2, 0(a0)
4166; CHECK-RV64-NEXT:    ntl.pall
4167; CHECK-RV64-NEXT:    sw a3, 4(a0)
4168; CHECK-RV64-NEXT:    ntl.pall
4169; CHECK-RV64-NEXT:    sw a4, 8(a0)
4170; CHECK-RV64-NEXT:    ntl.pall
4171; CHECK-RV64-NEXT:    sw a1, 12(a0)
4172; CHECK-RV64-NEXT:    ret
4173;
4174; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v4i32:
4175; CHECK-RV32:       # %bb.0:
4176; CHECK-RV32-NEXT:    lw a2, 0(a1)
4177; CHECK-RV32-NEXT:    lw a3, 4(a1)
4178; CHECK-RV32-NEXT:    lw a4, 8(a1)
4179; CHECK-RV32-NEXT:    lw a1, 12(a1)
4180; CHECK-RV32-NEXT:    ntl.pall
4181; CHECK-RV32-NEXT:    sw a2, 0(a0)
4182; CHECK-RV32-NEXT:    ntl.pall
4183; CHECK-RV32-NEXT:    sw a3, 4(a0)
4184; CHECK-RV32-NEXT:    ntl.pall
4185; CHECK-RV32-NEXT:    sw a4, 8(a0)
4186; CHECK-RV32-NEXT:    ntl.pall
4187; CHECK-RV32-NEXT:    sw a1, 12(a0)
4188; CHECK-RV32-NEXT:    ret
4189;
4190; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v4i32:
4191; CHECK-RV64C:       # %bb.0:
4192; CHECK-RV64C-NEXT:    lw a2, 0(a1)
4193; CHECK-RV64C-NEXT:    lw a3, 8(a1)
4194; CHECK-RV64C-NEXT:    lw a4, 16(a1)
4195; CHECK-RV64C-NEXT:    lw a1, 24(a1)
4196; CHECK-RV64C-NEXT:    c.ntl.pall
4197; CHECK-RV64C-NEXT:    sw a2, 0(a0)
4198; CHECK-RV64C-NEXT:    c.ntl.pall
4199; CHECK-RV64C-NEXT:    sw a3, 4(a0)
4200; CHECK-RV64C-NEXT:    c.ntl.pall
4201; CHECK-RV64C-NEXT:    sw a4, 8(a0)
4202; CHECK-RV64C-NEXT:    c.ntl.pall
4203; CHECK-RV64C-NEXT:    sw a1, 12(a0)
4204; CHECK-RV64C-NEXT:    ret
4205;
4206; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v4i32:
4207; CHECK-RV32C:       # %bb.0:
4208; CHECK-RV32C-NEXT:    lw a2, 0(a1)
4209; CHECK-RV32C-NEXT:    lw a3, 4(a1)
4210; CHECK-RV32C-NEXT:    lw a4, 8(a1)
4211; CHECK-RV32C-NEXT:    lw a1, 12(a1)
4212; CHECK-RV32C-NEXT:    c.ntl.pall
4213; CHECK-RV32C-NEXT:    sw a2, 0(a0)
4214; CHECK-RV32C-NEXT:    c.ntl.pall
4215; CHECK-RV32C-NEXT:    sw a3, 4(a0)
4216; CHECK-RV32C-NEXT:    c.ntl.pall
4217; CHECK-RV32C-NEXT:    sw a4, 8(a0)
4218; CHECK-RV32C-NEXT:    c.ntl.pall
4219; CHECK-RV32C-NEXT:    sw a1, 12(a0)
4220; CHECK-RV32C-NEXT:    ret
4221;
4222; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v4i32:
4223; CHECK-RV64V:       # %bb.0:
4224; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4225; CHECK-RV64V-NEXT:    ntl.pall
4226; CHECK-RV64V-NEXT:    vse32.v v8, (a0)
4227; CHECK-RV64V-NEXT:    ret
4228;
4229; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_v4i32:
4230; CHECK-RV32V:       # %bb.0:
4231; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4232; CHECK-RV32V-NEXT:    ntl.pall
4233; CHECK-RV32V-NEXT:    vse32.v v8, (a0)
4234; CHECK-RV32V-NEXT:    ret
4235  store <4 x i32> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
4236  ret void
4237}
4238
4239define void @test_nontemporal_PALL_store_v2i64(ptr %p, <2 x i64> %v) {
4240; CHECK-RV64-LABEL: test_nontemporal_PALL_store_v2i64:
4241; CHECK-RV64:       # %bb.0:
4242; CHECK-RV64-NEXT:    ntl.pall
4243; CHECK-RV64-NEXT:    sd a1, 0(a0)
4244; CHECK-RV64-NEXT:    ntl.pall
4245; CHECK-RV64-NEXT:    sd a2, 8(a0)
4246; CHECK-RV64-NEXT:    ret
4247;
4248; CHECK-RV32-LABEL: test_nontemporal_PALL_store_v2i64:
4249; CHECK-RV32:       # %bb.0:
4250; CHECK-RV32-NEXT:    lw a2, 0(a1)
4251; CHECK-RV32-NEXT:    lw a3, 4(a1)
4252; CHECK-RV32-NEXT:    lw a4, 8(a1)
4253; CHECK-RV32-NEXT:    lw a1, 12(a1)
4254; CHECK-RV32-NEXT:    ntl.pall
4255; CHECK-RV32-NEXT:    sw a2, 0(a0)
4256; CHECK-RV32-NEXT:    ntl.pall
4257; CHECK-RV32-NEXT:    sw a3, 4(a0)
4258; CHECK-RV32-NEXT:    ntl.pall
4259; CHECK-RV32-NEXT:    sw a4, 8(a0)
4260; CHECK-RV32-NEXT:    ntl.pall
4261; CHECK-RV32-NEXT:    sw a1, 12(a0)
4262; CHECK-RV32-NEXT:    ret
4263;
4264; CHECK-RV64C-LABEL: test_nontemporal_PALL_store_v2i64:
4265; CHECK-RV64C:       # %bb.0:
4266; CHECK-RV64C-NEXT:    c.ntl.pall
4267; CHECK-RV64C-NEXT:    sd a1, 0(a0)
4268; CHECK-RV64C-NEXT:    c.ntl.pall
4269; CHECK-RV64C-NEXT:    sd a2, 8(a0)
4270; CHECK-RV64C-NEXT:    ret
4271;
4272; CHECK-RV32C-LABEL: test_nontemporal_PALL_store_v2i64:
4273; CHECK-RV32C:       # %bb.0:
4274; CHECK-RV32C-NEXT:    lw a2, 0(a1)
4275; CHECK-RV32C-NEXT:    lw a3, 4(a1)
4276; CHECK-RV32C-NEXT:    lw a4, 8(a1)
4277; CHECK-RV32C-NEXT:    lw a1, 12(a1)
4278; CHECK-RV32C-NEXT:    c.ntl.pall
4279; CHECK-RV32C-NEXT:    sw a2, 0(a0)
4280; CHECK-RV32C-NEXT:    c.ntl.pall
4281; CHECK-RV32C-NEXT:    sw a3, 4(a0)
4282; CHECK-RV32C-NEXT:    c.ntl.pall
4283; CHECK-RV32C-NEXT:    sw a4, 8(a0)
4284; CHECK-RV32C-NEXT:    c.ntl.pall
4285; CHECK-RV32C-NEXT:    sw a1, 12(a0)
4286; CHECK-RV32C-NEXT:    ret
4287;
4288; CHECK-RV64V-LABEL: test_nontemporal_PALL_store_v2i64:
4289; CHECK-RV64V:       # %bb.0:
4290; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4291; CHECK-RV64V-NEXT:    ntl.pall
4292; CHECK-RV64V-NEXT:    vse64.v v8, (a0)
4293; CHECK-RV64V-NEXT:    ret
4294;
4295; CHECK-RV32V-LABEL: test_nontemporal_PALL_store_v2i64:
4296; CHECK-RV32V:       # %bb.0:
4297; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4298; CHECK-RV32V-NEXT:    ntl.pall
4299; CHECK-RV32V-NEXT:    vse64.v v8, (a0)
4300; CHECK-RV32V-NEXT:    ret
4301  store <2 x i64> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !2
4302  ret void
4303}
4304
4305define i64 @test_nontemporal_S1_load_i64(ptr %p) {
4306; CHECK-RV64-LABEL: test_nontemporal_S1_load_i64:
4307; CHECK-RV64:       # %bb.0:
4308; CHECK-RV64-NEXT:    ntl.s1
4309; CHECK-RV64-NEXT:    ld a0, 0(a0)
4310; CHECK-RV64-NEXT:    ret
4311;
4312; CHECK-RV32-LABEL: test_nontemporal_S1_load_i64:
4313; CHECK-RV32:       # %bb.0:
4314; CHECK-RV32-NEXT:    ntl.s1
4315; CHECK-RV32-NEXT:    lw a2, 0(a0)
4316; CHECK-RV32-NEXT:    ntl.s1
4317; CHECK-RV32-NEXT:    lw a1, 4(a0)
4318; CHECK-RV32-NEXT:    mv a0, a2
4319; CHECK-RV32-NEXT:    ret
4320;
4321; CHECK-RV64C-LABEL: test_nontemporal_S1_load_i64:
4322; CHECK-RV64C:       # %bb.0:
4323; CHECK-RV64C-NEXT:    c.ntl.s1
4324; CHECK-RV64C-NEXT:    ld a0, 0(a0)
4325; CHECK-RV64C-NEXT:    ret
4326;
4327; CHECK-RV32C-LABEL: test_nontemporal_S1_load_i64:
4328; CHECK-RV32C:       # %bb.0:
4329; CHECK-RV32C-NEXT:    c.ntl.s1
4330; CHECK-RV32C-NEXT:    lw a2, 0(a0)
4331; CHECK-RV32C-NEXT:    c.ntl.s1
4332; CHECK-RV32C-NEXT:    lw a1, 4(a0)
4333; CHECK-RV32C-NEXT:    mv a0, a2
4334; CHECK-RV32C-NEXT:    ret
4335;
4336; CHECK-RV64V-LABEL: test_nontemporal_S1_load_i64:
4337; CHECK-RV64V:       # %bb.0:
4338; CHECK-RV64V-NEXT:    ntl.s1
4339; CHECK-RV64V-NEXT:    ld a0, 0(a0)
4340; CHECK-RV64V-NEXT:    ret
4341;
4342; CHECK-RV32V-LABEL: test_nontemporal_S1_load_i64:
4343; CHECK-RV32V:       # %bb.0:
4344; CHECK-RV32V-NEXT:    ntl.s1
4345; CHECK-RV32V-NEXT:    lw a2, 0(a0)
4346; CHECK-RV32V-NEXT:    ntl.s1
4347; CHECK-RV32V-NEXT:    lw a1, 4(a0)
4348; CHECK-RV32V-NEXT:    mv a0, a2
4349; CHECK-RV32V-NEXT:    ret
4350  %1 = load i64, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4351  ret i64 %1
4352}
4353
4354define i32 @test_nontemporal_S1_load_i32(ptr %p) {
4355; CHECK-RV64-LABEL: test_nontemporal_S1_load_i32:
4356; CHECK-RV64:       # %bb.0:
4357; CHECK-RV64-NEXT:    ntl.s1
4358; CHECK-RV64-NEXT:    lw a0, 0(a0)
4359; CHECK-RV64-NEXT:    ret
4360;
4361; CHECK-RV32-LABEL: test_nontemporal_S1_load_i32:
4362; CHECK-RV32:       # %bb.0:
4363; CHECK-RV32-NEXT:    ntl.s1
4364; CHECK-RV32-NEXT:    lw a0, 0(a0)
4365; CHECK-RV32-NEXT:    ret
4366;
4367; CHECK-RV64C-LABEL: test_nontemporal_S1_load_i32:
4368; CHECK-RV64C:       # %bb.0:
4369; CHECK-RV64C-NEXT:    c.ntl.s1
4370; CHECK-RV64C-NEXT:    lw a0, 0(a0)
4371; CHECK-RV64C-NEXT:    ret
4372;
4373; CHECK-RV32C-LABEL: test_nontemporal_S1_load_i32:
4374; CHECK-RV32C:       # %bb.0:
4375; CHECK-RV32C-NEXT:    c.ntl.s1
4376; CHECK-RV32C-NEXT:    lw a0, 0(a0)
4377; CHECK-RV32C-NEXT:    ret
4378;
4379; CHECK-RV64V-LABEL: test_nontemporal_S1_load_i32:
4380; CHECK-RV64V:       # %bb.0:
4381; CHECK-RV64V-NEXT:    ntl.s1
4382; CHECK-RV64V-NEXT:    lw a0, 0(a0)
4383; CHECK-RV64V-NEXT:    ret
4384;
4385; CHECK-RV32V-LABEL: test_nontemporal_S1_load_i32:
4386; CHECK-RV32V:       # %bb.0:
4387; CHECK-RV32V-NEXT:    ntl.s1
4388; CHECK-RV32V-NEXT:    lw a0, 0(a0)
4389; CHECK-RV32V-NEXT:    ret
4390  %1 = load i32, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4391  ret i32 %1
4392}
4393
4394define i16 @test_nontemporal_S1_load_i16(ptr %p) {
4395; CHECK-RV64-LABEL: test_nontemporal_S1_load_i16:
4396; CHECK-RV64:       # %bb.0:
4397; CHECK-RV64-NEXT:    ntl.s1
4398; CHECK-RV64-NEXT:    lh a0, 0(a0)
4399; CHECK-RV64-NEXT:    ret
4400;
4401; CHECK-RV32-LABEL: test_nontemporal_S1_load_i16:
4402; CHECK-RV32:       # %bb.0:
4403; CHECK-RV32-NEXT:    ntl.s1
4404; CHECK-RV32-NEXT:    lh a0, 0(a0)
4405; CHECK-RV32-NEXT:    ret
4406;
4407; CHECK-RV64C-LABEL: test_nontemporal_S1_load_i16:
4408; CHECK-RV64C:       # %bb.0:
4409; CHECK-RV64C-NEXT:    c.ntl.s1
4410; CHECK-RV64C-NEXT:    lh a0, 0(a0)
4411; CHECK-RV64C-NEXT:    ret
4412;
4413; CHECK-RV32C-LABEL: test_nontemporal_S1_load_i16:
4414; CHECK-RV32C:       # %bb.0:
4415; CHECK-RV32C-NEXT:    c.ntl.s1
4416; CHECK-RV32C-NEXT:    lh a0, 0(a0)
4417; CHECK-RV32C-NEXT:    ret
4418;
4419; CHECK-RV64V-LABEL: test_nontemporal_S1_load_i16:
4420; CHECK-RV64V:       # %bb.0:
4421; CHECK-RV64V-NEXT:    ntl.s1
4422; CHECK-RV64V-NEXT:    lh a0, 0(a0)
4423; CHECK-RV64V-NEXT:    ret
4424;
4425; CHECK-RV32V-LABEL: test_nontemporal_S1_load_i16:
4426; CHECK-RV32V:       # %bb.0:
4427; CHECK-RV32V-NEXT:    ntl.s1
4428; CHECK-RV32V-NEXT:    lh a0, 0(a0)
4429; CHECK-RV32V-NEXT:    ret
4430  %1 = load i16, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4431  ret i16 %1
4432}
4433
4434define i8 @test_nontemporal_S1_load_i8(ptr %p) {
4435; CHECK-RV64-LABEL: test_nontemporal_S1_load_i8:
4436; CHECK-RV64:       # %bb.0:
4437; CHECK-RV64-NEXT:    ntl.s1
4438; CHECK-RV64-NEXT:    lbu a0, 0(a0)
4439; CHECK-RV64-NEXT:    ret
4440;
4441; CHECK-RV32-LABEL: test_nontemporal_S1_load_i8:
4442; CHECK-RV32:       # %bb.0:
4443; CHECK-RV32-NEXT:    ntl.s1
4444; CHECK-RV32-NEXT:    lbu a0, 0(a0)
4445; CHECK-RV32-NEXT:    ret
4446;
4447; CHECK-RV64C-LABEL: test_nontemporal_S1_load_i8:
4448; CHECK-RV64C:       # %bb.0:
4449; CHECK-RV64C-NEXT:    c.ntl.s1
4450; CHECK-RV64C-NEXT:    lbu a0, 0(a0)
4451; CHECK-RV64C-NEXT:    ret
4452;
4453; CHECK-RV32C-LABEL: test_nontemporal_S1_load_i8:
4454; CHECK-RV32C:       # %bb.0:
4455; CHECK-RV32C-NEXT:    c.ntl.s1
4456; CHECK-RV32C-NEXT:    lbu a0, 0(a0)
4457; CHECK-RV32C-NEXT:    ret
4458;
4459; CHECK-RV64V-LABEL: test_nontemporal_S1_load_i8:
4460; CHECK-RV64V:       # %bb.0:
4461; CHECK-RV64V-NEXT:    ntl.s1
4462; CHECK-RV64V-NEXT:    lbu a0, 0(a0)
4463; CHECK-RV64V-NEXT:    ret
4464;
4465; CHECK-RV32V-LABEL: test_nontemporal_S1_load_i8:
4466; CHECK-RV32V:       # %bb.0:
4467; CHECK-RV32V-NEXT:    ntl.s1
4468; CHECK-RV32V-NEXT:    lbu a0, 0(a0)
4469; CHECK-RV32V-NEXT:    ret
4470  %1 = load i8, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4471  ret i8 %1
4472}
4473
4474define half @test_nontemporal_S1_load_half(ptr %p) nounwind {
4475; CHECK-RV64-LABEL: test_nontemporal_S1_load_half:
4476; CHECK-RV64:       # %bb.0:
4477; CHECK-RV64-NEXT:    ntl.s1
4478; CHECK-RV64-NEXT:    flh fa5, 0(a0)
4479; CHECK-RV64-NEXT:    ntl.s1
4480; CHECK-RV64-NEXT:    flh fa4, 6(a0)
4481; CHECK-RV64-NEXT:    fadd.h fa0, fa5, fa4
4482; CHECK-RV64-NEXT:    ret
4483;
4484; CHECK-RV32-LABEL: test_nontemporal_S1_load_half:
4485; CHECK-RV32:       # %bb.0:
4486; CHECK-RV32-NEXT:    ntl.s1
4487; CHECK-RV32-NEXT:    flh fa5, 0(a0)
4488; CHECK-RV32-NEXT:    ntl.s1
4489; CHECK-RV32-NEXT:    flh fa4, 6(a0)
4490; CHECK-RV32-NEXT:    fadd.h fa0, fa5, fa4
4491; CHECK-RV32-NEXT:    ret
4492;
4493; CHECK-RV64C-LABEL: test_nontemporal_S1_load_half:
4494; CHECK-RV64C:       # %bb.0:
4495; CHECK-RV64C-NEXT:    c.ntl.s1
4496; CHECK-RV64C-NEXT:    flh fa5, 0(a0)
4497; CHECK-RV64C-NEXT:    c.ntl.s1
4498; CHECK-RV64C-NEXT:    flh fa4, 6(a0)
4499; CHECK-RV64C-NEXT:    fadd.h fa0, fa5, fa4
4500; CHECK-RV64C-NEXT:    ret
4501;
4502; CHECK-RV32C-LABEL: test_nontemporal_S1_load_half:
4503; CHECK-RV32C:       # %bb.0:
4504; CHECK-RV32C-NEXT:    c.ntl.s1
4505; CHECK-RV32C-NEXT:    flh fa5, 0(a0)
4506; CHECK-RV32C-NEXT:    c.ntl.s1
4507; CHECK-RV32C-NEXT:    flh fa4, 6(a0)
4508; CHECK-RV32C-NEXT:    fadd.h fa0, fa5, fa4
4509; CHECK-RV32C-NEXT:    ret
4510;
4511; CHECK-RV64V-LABEL: test_nontemporal_S1_load_half:
4512; CHECK-RV64V:       # %bb.0:
4513; CHECK-RV64V-NEXT:    ntl.s1
4514; CHECK-RV64V-NEXT:    flh fa5, 0(a0)
4515; CHECK-RV64V-NEXT:    ntl.s1
4516; CHECK-RV64V-NEXT:    flh fa4, 6(a0)
4517; CHECK-RV64V-NEXT:    fadd.h fa0, fa5, fa4
4518; CHECK-RV64V-NEXT:    ret
4519;
4520; CHECK-RV32V-LABEL: test_nontemporal_S1_load_half:
4521; CHECK-RV32V:       # %bb.0:
4522; CHECK-RV32V-NEXT:    ntl.s1
4523; CHECK-RV32V-NEXT:    flh fa5, 0(a0)
4524; CHECK-RV32V-NEXT:    ntl.s1
4525; CHECK-RV32V-NEXT:    flh fa4, 6(a0)
4526; CHECK-RV32V-NEXT:    fadd.h fa0, fa5, fa4
4527; CHECK-RV32V-NEXT:    ret
4528  %1 = load half, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4529  %2 = getelementptr half, ptr %p, i32 3
4530  %3 = load half, ptr %2, !nontemporal !0, !riscv-nontemporal-domain !3
4531  %4 = fadd half %1, %3
4532  ret half %4
4533}
4534
4535define float @test_nontemporal_S1_load_float(ptr %p) {
4536; CHECK-RV64-LABEL: test_nontemporal_S1_load_float:
4537; CHECK-RV64:       # %bb.0:
4538; CHECK-RV64-NEXT:    ntl.s1
4539; CHECK-RV64-NEXT:    flw fa0, 0(a0)
4540; CHECK-RV64-NEXT:    ret
4541;
4542; CHECK-RV32-LABEL: test_nontemporal_S1_load_float:
4543; CHECK-RV32:       # %bb.0:
4544; CHECK-RV32-NEXT:    ntl.s1
4545; CHECK-RV32-NEXT:    flw fa0, 0(a0)
4546; CHECK-RV32-NEXT:    ret
4547;
4548; CHECK-RV64C-LABEL: test_nontemporal_S1_load_float:
4549; CHECK-RV64C:       # %bb.0:
4550; CHECK-RV64C-NEXT:    c.ntl.s1
4551; CHECK-RV64C-NEXT:    flw fa0, 0(a0)
4552; CHECK-RV64C-NEXT:    ret
4553;
4554; CHECK-RV32C-LABEL: test_nontemporal_S1_load_float:
4555; CHECK-RV32C:       # %bb.0:
4556; CHECK-RV32C-NEXT:    c.ntl.s1
4557; CHECK-RV32C-NEXT:    flw fa0, 0(a0)
4558; CHECK-RV32C-NEXT:    ret
4559;
4560; CHECK-RV64V-LABEL: test_nontemporal_S1_load_float:
4561; CHECK-RV64V:       # %bb.0:
4562; CHECK-RV64V-NEXT:    ntl.s1
4563; CHECK-RV64V-NEXT:    flw fa0, 0(a0)
4564; CHECK-RV64V-NEXT:    ret
4565;
4566; CHECK-RV32V-LABEL: test_nontemporal_S1_load_float:
4567; CHECK-RV32V:       # %bb.0:
4568; CHECK-RV32V-NEXT:    ntl.s1
4569; CHECK-RV32V-NEXT:    flw fa0, 0(a0)
4570; CHECK-RV32V-NEXT:    ret
4571  %1 = load float, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4572  ret float %1
4573}
4574
4575define double @test_nontemporal_S1_load_double(ptr %p) {
4576; CHECK-RV64-LABEL: test_nontemporal_S1_load_double:
4577; CHECK-RV64:       # %bb.0:
4578; CHECK-RV64-NEXT:    ntl.s1
4579; CHECK-RV64-NEXT:    fld fa0, 0(a0)
4580; CHECK-RV64-NEXT:    ret
4581;
4582; CHECK-RV32-LABEL: test_nontemporal_S1_load_double:
4583; CHECK-RV32:       # %bb.0:
4584; CHECK-RV32-NEXT:    ntl.s1
4585; CHECK-RV32-NEXT:    fld fa0, 0(a0)
4586; CHECK-RV32-NEXT:    ret
4587;
4588; CHECK-RV64C-LABEL: test_nontemporal_S1_load_double:
4589; CHECK-RV64C:       # %bb.0:
4590; CHECK-RV64C-NEXT:    c.ntl.s1
4591; CHECK-RV64C-NEXT:    fld fa0, 0(a0)
4592; CHECK-RV64C-NEXT:    ret
4593;
4594; CHECK-RV32C-LABEL: test_nontemporal_S1_load_double:
4595; CHECK-RV32C:       # %bb.0:
4596; CHECK-RV32C-NEXT:    c.ntl.s1
4597; CHECK-RV32C-NEXT:    fld fa0, 0(a0)
4598; CHECK-RV32C-NEXT:    ret
4599;
4600; CHECK-RV64V-LABEL: test_nontemporal_S1_load_double:
4601; CHECK-RV64V:       # %bb.0:
4602; CHECK-RV64V-NEXT:    ntl.s1
4603; CHECK-RV64V-NEXT:    fld fa0, 0(a0)
4604; CHECK-RV64V-NEXT:    ret
4605;
4606; CHECK-RV32V-LABEL: test_nontemporal_S1_load_double:
4607; CHECK-RV32V:       # %bb.0:
4608; CHECK-RV32V-NEXT:    ntl.s1
4609; CHECK-RV32V-NEXT:    fld fa0, 0(a0)
4610; CHECK-RV32V-NEXT:    ret
4611  %1 = load double, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4612  ret double %1
4613}
4614
4615define <16 x i8> @test_nontemporal_S1_load_v16i8(ptr %p) {
4616; CHECK-RV64-LABEL: test_nontemporal_S1_load_v16i8:
4617; CHECK-RV64:       # %bb.0:
4618; CHECK-RV64-NEXT:    ntl.s1
4619; CHECK-RV64-NEXT:    ld a2, 0(a1)
4620; CHECK-RV64-NEXT:    ntl.s1
4621; CHECK-RV64-NEXT:    ld a1, 8(a1)
4622; CHECK-RV64-NEXT:    sd a2, 0(a0)
4623; CHECK-RV64-NEXT:    sd a1, 8(a0)
4624; CHECK-RV64-NEXT:    ret
4625;
4626; CHECK-RV32-LABEL: test_nontemporal_S1_load_v16i8:
4627; CHECK-RV32:       # %bb.0:
4628; CHECK-RV32-NEXT:    ntl.s1
4629; CHECK-RV32-NEXT:    lw a2, 0(a1)
4630; CHECK-RV32-NEXT:    ntl.s1
4631; CHECK-RV32-NEXT:    lw a3, 4(a1)
4632; CHECK-RV32-NEXT:    ntl.s1
4633; CHECK-RV32-NEXT:    lw a4, 8(a1)
4634; CHECK-RV32-NEXT:    ntl.s1
4635; CHECK-RV32-NEXT:    lw a1, 12(a1)
4636; CHECK-RV32-NEXT:    sw a2, 0(a0)
4637; CHECK-RV32-NEXT:    sw a3, 4(a0)
4638; CHECK-RV32-NEXT:    sw a4, 8(a0)
4639; CHECK-RV32-NEXT:    sw a1, 12(a0)
4640; CHECK-RV32-NEXT:    ret
4641;
4642; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v16i8:
4643; CHECK-RV64C:       # %bb.0:
4644; CHECK-RV64C-NEXT:    c.ntl.s1
4645; CHECK-RV64C-NEXT:    ld a2, 0(a1)
4646; CHECK-RV64C-NEXT:    c.ntl.s1
4647; CHECK-RV64C-NEXT:    ld a1, 8(a1)
4648; CHECK-RV64C-NEXT:    sd a2, 0(a0)
4649; CHECK-RV64C-NEXT:    sd a1, 8(a0)
4650; CHECK-RV64C-NEXT:    ret
4651;
4652; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v16i8:
4653; CHECK-RV32C:       # %bb.0:
4654; CHECK-RV32C-NEXT:    c.ntl.s1
4655; CHECK-RV32C-NEXT:    lw a2, 0(a1)
4656; CHECK-RV32C-NEXT:    c.ntl.s1
4657; CHECK-RV32C-NEXT:    lw a3, 4(a1)
4658; CHECK-RV32C-NEXT:    c.ntl.s1
4659; CHECK-RV32C-NEXT:    lw a4, 8(a1)
4660; CHECK-RV32C-NEXT:    c.ntl.s1
4661; CHECK-RV32C-NEXT:    lw a1, 12(a1)
4662; CHECK-RV32C-NEXT:    sw a2, 0(a0)
4663; CHECK-RV32C-NEXT:    sw a3, 4(a0)
4664; CHECK-RV32C-NEXT:    sw a4, 8(a0)
4665; CHECK-RV32C-NEXT:    sw a1, 12(a0)
4666; CHECK-RV32C-NEXT:    ret
4667;
4668; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v16i8:
4669; CHECK-RV64V:       # %bb.0:
4670; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
4671; CHECK-RV64V-NEXT:    ntl.s1
4672; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
4673; CHECK-RV64V-NEXT:    ret
4674;
4675; CHECK-RV32V-LABEL: test_nontemporal_S1_load_v16i8:
4676; CHECK-RV32V:       # %bb.0:
4677; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
4678; CHECK-RV32V-NEXT:    ntl.s1
4679; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
4680; CHECK-RV32V-NEXT:    ret
4681  %1 = load <16 x i8>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4682  ret <16 x i8> %1
4683}
4684
4685define <8 x i16> @test_nontemporal_S1_load_v8i16(ptr %p) {
4686; CHECK-RV64-LABEL: test_nontemporal_S1_load_v8i16:
4687; CHECK-RV64:       # %bb.0:
4688; CHECK-RV64-NEXT:    ntl.s1
4689; CHECK-RV64-NEXT:    ld a2, 0(a1)
4690; CHECK-RV64-NEXT:    ntl.s1
4691; CHECK-RV64-NEXT:    ld a1, 8(a1)
4692; CHECK-RV64-NEXT:    sd a2, 0(a0)
4693; CHECK-RV64-NEXT:    sd a1, 8(a0)
4694; CHECK-RV64-NEXT:    ret
4695;
4696; CHECK-RV32-LABEL: test_nontemporal_S1_load_v8i16:
4697; CHECK-RV32:       # %bb.0:
4698; CHECK-RV32-NEXT:    ntl.s1
4699; CHECK-RV32-NEXT:    lw a2, 0(a1)
4700; CHECK-RV32-NEXT:    ntl.s1
4701; CHECK-RV32-NEXT:    lw a3, 4(a1)
4702; CHECK-RV32-NEXT:    ntl.s1
4703; CHECK-RV32-NEXT:    lw a4, 8(a1)
4704; CHECK-RV32-NEXT:    ntl.s1
4705; CHECK-RV32-NEXT:    lw a1, 12(a1)
4706; CHECK-RV32-NEXT:    sw a2, 0(a0)
4707; CHECK-RV32-NEXT:    sw a3, 4(a0)
4708; CHECK-RV32-NEXT:    sw a4, 8(a0)
4709; CHECK-RV32-NEXT:    sw a1, 12(a0)
4710; CHECK-RV32-NEXT:    ret
4711;
4712; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v8i16:
4713; CHECK-RV64C:       # %bb.0:
4714; CHECK-RV64C-NEXT:    c.ntl.s1
4715; CHECK-RV64C-NEXT:    ld a2, 0(a1)
4716; CHECK-RV64C-NEXT:    c.ntl.s1
4717; CHECK-RV64C-NEXT:    ld a1, 8(a1)
4718; CHECK-RV64C-NEXT:    sd a2, 0(a0)
4719; CHECK-RV64C-NEXT:    sd a1, 8(a0)
4720; CHECK-RV64C-NEXT:    ret
4721;
4722; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v8i16:
4723; CHECK-RV32C:       # %bb.0:
4724; CHECK-RV32C-NEXT:    c.ntl.s1
4725; CHECK-RV32C-NEXT:    lw a2, 0(a1)
4726; CHECK-RV32C-NEXT:    c.ntl.s1
4727; CHECK-RV32C-NEXT:    lw a3, 4(a1)
4728; CHECK-RV32C-NEXT:    c.ntl.s1
4729; CHECK-RV32C-NEXT:    lw a4, 8(a1)
4730; CHECK-RV32C-NEXT:    c.ntl.s1
4731; CHECK-RV32C-NEXT:    lw a1, 12(a1)
4732; CHECK-RV32C-NEXT:    sw a2, 0(a0)
4733; CHECK-RV32C-NEXT:    sw a3, 4(a0)
4734; CHECK-RV32C-NEXT:    sw a4, 8(a0)
4735; CHECK-RV32C-NEXT:    sw a1, 12(a0)
4736; CHECK-RV32C-NEXT:    ret
4737;
4738; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v8i16:
4739; CHECK-RV64V:       # %bb.0:
4740; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4741; CHECK-RV64V-NEXT:    ntl.s1
4742; CHECK-RV64V-NEXT:    vle16.v v8, (a0)
4743; CHECK-RV64V-NEXT:    ret
4744;
4745; CHECK-RV32V-LABEL: test_nontemporal_S1_load_v8i16:
4746; CHECK-RV32V:       # %bb.0:
4747; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
4748; CHECK-RV32V-NEXT:    ntl.s1
4749; CHECK-RV32V-NEXT:    vle16.v v8, (a0)
4750; CHECK-RV32V-NEXT:    ret
4751  %1 = load <8 x i16>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4752  ret <8 x i16> %1
4753}
4754
4755define <4 x i32> @test_nontemporal_S1_load_v4i32(ptr %p) {
4756; CHECK-RV64-LABEL: test_nontemporal_S1_load_v4i32:
4757; CHECK-RV64:       # %bb.0:
4758; CHECK-RV64-NEXT:    ntl.s1
4759; CHECK-RV64-NEXT:    ld a2, 0(a1)
4760; CHECK-RV64-NEXT:    ntl.s1
4761; CHECK-RV64-NEXT:    ld a1, 8(a1)
4762; CHECK-RV64-NEXT:    sd a2, 0(a0)
4763; CHECK-RV64-NEXT:    sd a1, 8(a0)
4764; CHECK-RV64-NEXT:    ret
4765;
4766; CHECK-RV32-LABEL: test_nontemporal_S1_load_v4i32:
4767; CHECK-RV32:       # %bb.0:
4768; CHECK-RV32-NEXT:    ntl.s1
4769; CHECK-RV32-NEXT:    lw a2, 0(a1)
4770; CHECK-RV32-NEXT:    ntl.s1
4771; CHECK-RV32-NEXT:    lw a3, 4(a1)
4772; CHECK-RV32-NEXT:    ntl.s1
4773; CHECK-RV32-NEXT:    lw a4, 8(a1)
4774; CHECK-RV32-NEXT:    ntl.s1
4775; CHECK-RV32-NEXT:    lw a1, 12(a1)
4776; CHECK-RV32-NEXT:    sw a2, 0(a0)
4777; CHECK-RV32-NEXT:    sw a3, 4(a0)
4778; CHECK-RV32-NEXT:    sw a4, 8(a0)
4779; CHECK-RV32-NEXT:    sw a1, 12(a0)
4780; CHECK-RV32-NEXT:    ret
4781;
4782; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v4i32:
4783; CHECK-RV64C:       # %bb.0:
4784; CHECK-RV64C-NEXT:    c.ntl.s1
4785; CHECK-RV64C-NEXT:    ld a2, 0(a1)
4786; CHECK-RV64C-NEXT:    c.ntl.s1
4787; CHECK-RV64C-NEXT:    ld a1, 8(a1)
4788; CHECK-RV64C-NEXT:    sd a2, 0(a0)
4789; CHECK-RV64C-NEXT:    sd a1, 8(a0)
4790; CHECK-RV64C-NEXT:    ret
4791;
4792; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v4i32:
4793; CHECK-RV32C:       # %bb.0:
4794; CHECK-RV32C-NEXT:    c.ntl.s1
4795; CHECK-RV32C-NEXT:    lw a2, 0(a1)
4796; CHECK-RV32C-NEXT:    c.ntl.s1
4797; CHECK-RV32C-NEXT:    lw a3, 4(a1)
4798; CHECK-RV32C-NEXT:    c.ntl.s1
4799; CHECK-RV32C-NEXT:    lw a4, 8(a1)
4800; CHECK-RV32C-NEXT:    c.ntl.s1
4801; CHECK-RV32C-NEXT:    lw a1, 12(a1)
4802; CHECK-RV32C-NEXT:    sw a2, 0(a0)
4803; CHECK-RV32C-NEXT:    sw a3, 4(a0)
4804; CHECK-RV32C-NEXT:    sw a4, 8(a0)
4805; CHECK-RV32C-NEXT:    sw a1, 12(a0)
4806; CHECK-RV32C-NEXT:    ret
4807;
4808; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v4i32:
4809; CHECK-RV64V:       # %bb.0:
4810; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4811; CHECK-RV64V-NEXT:    ntl.s1
4812; CHECK-RV64V-NEXT:    vle32.v v8, (a0)
4813; CHECK-RV64V-NEXT:    ret
4814;
4815; CHECK-RV32V-LABEL: test_nontemporal_S1_load_v4i32:
4816; CHECK-RV32V:       # %bb.0:
4817; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
4818; CHECK-RV32V-NEXT:    ntl.s1
4819; CHECK-RV32V-NEXT:    vle32.v v8, (a0)
4820; CHECK-RV32V-NEXT:    ret
4821  %1 = load <4 x i32>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4822  ret <4 x i32> %1
4823}
4824
4825define <2 x i64> @test_nontemporal_S1_load_v2i64(ptr %p) {
4826; CHECK-RV64-LABEL: test_nontemporal_S1_load_v2i64:
4827; CHECK-RV64:       # %bb.0:
4828; CHECK-RV64-NEXT:    ntl.s1
4829; CHECK-RV64-NEXT:    ld a2, 0(a0)
4830; CHECK-RV64-NEXT:    ntl.s1
4831; CHECK-RV64-NEXT:    ld a1, 8(a0)
4832; CHECK-RV64-NEXT:    mv a0, a2
4833; CHECK-RV64-NEXT:    ret
4834;
4835; CHECK-RV32-LABEL: test_nontemporal_S1_load_v2i64:
4836; CHECK-RV32:       # %bb.0:
4837; CHECK-RV32-NEXT:    ntl.s1
4838; CHECK-RV32-NEXT:    lw a2, 0(a1)
4839; CHECK-RV32-NEXT:    ntl.s1
4840; CHECK-RV32-NEXT:    lw a3, 4(a1)
4841; CHECK-RV32-NEXT:    ntl.s1
4842; CHECK-RV32-NEXT:    lw a4, 8(a1)
4843; CHECK-RV32-NEXT:    ntl.s1
4844; CHECK-RV32-NEXT:    lw a1, 12(a1)
4845; CHECK-RV32-NEXT:    sw a2, 0(a0)
4846; CHECK-RV32-NEXT:    sw a3, 4(a0)
4847; CHECK-RV32-NEXT:    sw a4, 8(a0)
4848; CHECK-RV32-NEXT:    sw a1, 12(a0)
4849; CHECK-RV32-NEXT:    ret
4850;
4851; CHECK-RV64C-LABEL: test_nontemporal_S1_load_v2i64:
4852; CHECK-RV64C:       # %bb.0:
4853; CHECK-RV64C-NEXT:    c.ntl.s1
4854; CHECK-RV64C-NEXT:    ld a2, 0(a0)
4855; CHECK-RV64C-NEXT:    c.ntl.s1
4856; CHECK-RV64C-NEXT:    ld a1, 8(a0)
4857; CHECK-RV64C-NEXT:    mv a0, a2
4858; CHECK-RV64C-NEXT:    ret
4859;
4860; CHECK-RV32C-LABEL: test_nontemporal_S1_load_v2i64:
4861; CHECK-RV32C:       # %bb.0:
4862; CHECK-RV32C-NEXT:    c.ntl.s1
4863; CHECK-RV32C-NEXT:    lw a2, 0(a1)
4864; CHECK-RV32C-NEXT:    c.ntl.s1
4865; CHECK-RV32C-NEXT:    lw a3, 4(a1)
4866; CHECK-RV32C-NEXT:    c.ntl.s1
4867; CHECK-RV32C-NEXT:    lw a4, 8(a1)
4868; CHECK-RV32C-NEXT:    c.ntl.s1
4869; CHECK-RV32C-NEXT:    lw a1, 12(a1)
4870; CHECK-RV32C-NEXT:    sw a2, 0(a0)
4871; CHECK-RV32C-NEXT:    sw a3, 4(a0)
4872; CHECK-RV32C-NEXT:    sw a4, 8(a0)
4873; CHECK-RV32C-NEXT:    sw a1, 12(a0)
4874; CHECK-RV32C-NEXT:    ret
4875;
4876; CHECK-RV64V-LABEL: test_nontemporal_S1_load_v2i64:
4877; CHECK-RV64V:       # %bb.0:
4878; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4879; CHECK-RV64V-NEXT:    ntl.s1
4880; CHECK-RV64V-NEXT:    vle64.v v8, (a0)
4881; CHECK-RV64V-NEXT:    ret
4882;
4883; CHECK-RV32V-LABEL: test_nontemporal_S1_load_v2i64:
4884; CHECK-RV32V:       # %bb.0:
4885; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
4886; CHECK-RV32V-NEXT:    ntl.s1
4887; CHECK-RV32V-NEXT:    vle64.v v8, (a0)
4888; CHECK-RV32V-NEXT:    ret
4889  %1 = load <2 x i64>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4890  ret <2 x i64> %1
4891}
4892
4893define void @test_nontemporal_S1_store_i64(ptr %p, i64 %v) {
4894; CHECK-RV64-LABEL: test_nontemporal_S1_store_i64:
4895; CHECK-RV64:       # %bb.0:
4896; CHECK-RV64-NEXT:    ntl.s1
4897; CHECK-RV64-NEXT:    sd a1, 0(a0)
4898; CHECK-RV64-NEXT:    ret
4899;
4900; CHECK-RV32-LABEL: test_nontemporal_S1_store_i64:
4901; CHECK-RV32:       # %bb.0:
4902; CHECK-RV32-NEXT:    ntl.s1
4903; CHECK-RV32-NEXT:    sw a1, 0(a0)
4904; CHECK-RV32-NEXT:    ntl.s1
4905; CHECK-RV32-NEXT:    sw a2, 4(a0)
4906; CHECK-RV32-NEXT:    ret
4907;
4908; CHECK-RV64C-LABEL: test_nontemporal_S1_store_i64:
4909; CHECK-RV64C:       # %bb.0:
4910; CHECK-RV64C-NEXT:    c.ntl.s1
4911; CHECK-RV64C-NEXT:    sd a1, 0(a0)
4912; CHECK-RV64C-NEXT:    ret
4913;
4914; CHECK-RV32C-LABEL: test_nontemporal_S1_store_i64:
4915; CHECK-RV32C:       # %bb.0:
4916; CHECK-RV32C-NEXT:    c.ntl.s1
4917; CHECK-RV32C-NEXT:    sw a1, 0(a0)
4918; CHECK-RV32C-NEXT:    c.ntl.s1
4919; CHECK-RV32C-NEXT:    sw a2, 4(a0)
4920; CHECK-RV32C-NEXT:    ret
4921;
4922; CHECK-RV64V-LABEL: test_nontemporal_S1_store_i64:
4923; CHECK-RV64V:       # %bb.0:
4924; CHECK-RV64V-NEXT:    ntl.s1
4925; CHECK-RV64V-NEXT:    sd a1, 0(a0)
4926; CHECK-RV64V-NEXT:    ret
4927;
4928; CHECK-RV32V-LABEL: test_nontemporal_S1_store_i64:
4929; CHECK-RV32V:       # %bb.0:
4930; CHECK-RV32V-NEXT:    ntl.s1
4931; CHECK-RV32V-NEXT:    sw a1, 0(a0)
4932; CHECK-RV32V-NEXT:    ntl.s1
4933; CHECK-RV32V-NEXT:    sw a2, 4(a0)
4934; CHECK-RV32V-NEXT:    ret
4935  store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4936  ret void
4937}
4938
4939define void @test_nontemporal_S1_store_i32(ptr %p, i32 %v) {
4940; CHECK-RV64-LABEL: test_nontemporal_S1_store_i32:
4941; CHECK-RV64:       # %bb.0:
4942; CHECK-RV64-NEXT:    ntl.s1
4943; CHECK-RV64-NEXT:    sw a1, 0(a0)
4944; CHECK-RV64-NEXT:    ret
4945;
4946; CHECK-RV32-LABEL: test_nontemporal_S1_store_i32:
4947; CHECK-RV32:       # %bb.0:
4948; CHECK-RV32-NEXT:    ntl.s1
4949; CHECK-RV32-NEXT:    sw a1, 0(a0)
4950; CHECK-RV32-NEXT:    ret
4951;
4952; CHECK-RV64C-LABEL: test_nontemporal_S1_store_i32:
4953; CHECK-RV64C:       # %bb.0:
4954; CHECK-RV64C-NEXT:    c.ntl.s1
4955; CHECK-RV64C-NEXT:    sw a1, 0(a0)
4956; CHECK-RV64C-NEXT:    ret
4957;
4958; CHECK-RV32C-LABEL: test_nontemporal_S1_store_i32:
4959; CHECK-RV32C:       # %bb.0:
4960; CHECK-RV32C-NEXT:    c.ntl.s1
4961; CHECK-RV32C-NEXT:    sw a1, 0(a0)
4962; CHECK-RV32C-NEXT:    ret
4963;
4964; CHECK-RV64V-LABEL: test_nontemporal_S1_store_i32:
4965; CHECK-RV64V:       # %bb.0:
4966; CHECK-RV64V-NEXT:    ntl.s1
4967; CHECK-RV64V-NEXT:    sw a1, 0(a0)
4968; CHECK-RV64V-NEXT:    ret
4969;
4970; CHECK-RV32V-LABEL: test_nontemporal_S1_store_i32:
4971; CHECK-RV32V:       # %bb.0:
4972; CHECK-RV32V-NEXT:    ntl.s1
4973; CHECK-RV32V-NEXT:    sw a1, 0(a0)
4974; CHECK-RV32V-NEXT:    ret
4975  store i32 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
4976  ret void
4977}
4978
4979define void @test_nontemporal_S1_store_i16(ptr %p, i16 %v) {
4980; CHECK-RV64-LABEL: test_nontemporal_S1_store_i16:
4981; CHECK-RV64:       # %bb.0:
4982; CHECK-RV64-NEXT:    ntl.s1
4983; CHECK-RV64-NEXT:    sh a1, 0(a0)
4984; CHECK-RV64-NEXT:    ret
4985;
4986; CHECK-RV32-LABEL: test_nontemporal_S1_store_i16:
4987; CHECK-RV32:       # %bb.0:
4988; CHECK-RV32-NEXT:    ntl.s1
4989; CHECK-RV32-NEXT:    sh a1, 0(a0)
4990; CHECK-RV32-NEXT:    ret
4991;
4992; CHECK-RV64C-LABEL: test_nontemporal_S1_store_i16:
4993; CHECK-RV64C:       # %bb.0:
4994; CHECK-RV64C-NEXT:    c.ntl.s1
4995; CHECK-RV64C-NEXT:    sh a1, 0(a0)
4996; CHECK-RV64C-NEXT:    ret
4997;
4998; CHECK-RV32C-LABEL: test_nontemporal_S1_store_i16:
4999; CHECK-RV32C:       # %bb.0:
5000; CHECK-RV32C-NEXT:    c.ntl.s1
5001; CHECK-RV32C-NEXT:    sh a1, 0(a0)
5002; CHECK-RV32C-NEXT:    ret
5003;
5004; CHECK-RV64V-LABEL: test_nontemporal_S1_store_i16:
5005; CHECK-RV64V:       # %bb.0:
5006; CHECK-RV64V-NEXT:    ntl.s1
5007; CHECK-RV64V-NEXT:    sh a1, 0(a0)
5008; CHECK-RV64V-NEXT:    ret
5009;
5010; CHECK-RV32V-LABEL: test_nontemporal_S1_store_i16:
5011; CHECK-RV32V:       # %bb.0:
5012; CHECK-RV32V-NEXT:    ntl.s1
5013; CHECK-RV32V-NEXT:    sh a1, 0(a0)
5014; CHECK-RV32V-NEXT:    ret
5015  store i16 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5016  ret void
5017}
5018
5019define void @test_nontemporal_S1_store_i8(ptr %p, i8 %v) {
5020; CHECK-RV64-LABEL: test_nontemporal_S1_store_i8:
5021; CHECK-RV64:       # %bb.0:
5022; CHECK-RV64-NEXT:    ntl.s1
5023; CHECK-RV64-NEXT:    sb a1, 0(a0)
5024; CHECK-RV64-NEXT:    ret
5025;
5026; CHECK-RV32-LABEL: test_nontemporal_S1_store_i8:
5027; CHECK-RV32:       # %bb.0:
5028; CHECK-RV32-NEXT:    ntl.s1
5029; CHECK-RV32-NEXT:    sb a1, 0(a0)
5030; CHECK-RV32-NEXT:    ret
5031;
5032; CHECK-RV64C-LABEL: test_nontemporal_S1_store_i8:
5033; CHECK-RV64C:       # %bb.0:
5034; CHECK-RV64C-NEXT:    c.ntl.s1
5035; CHECK-RV64C-NEXT:    sb a1, 0(a0)
5036; CHECK-RV64C-NEXT:    ret
5037;
5038; CHECK-RV32C-LABEL: test_nontemporal_S1_store_i8:
5039; CHECK-RV32C:       # %bb.0:
5040; CHECK-RV32C-NEXT:    c.ntl.s1
5041; CHECK-RV32C-NEXT:    sb a1, 0(a0)
5042; CHECK-RV32C-NEXT:    ret
5043;
5044; CHECK-RV64V-LABEL: test_nontemporal_S1_store_i8:
5045; CHECK-RV64V:       # %bb.0:
5046; CHECK-RV64V-NEXT:    ntl.s1
5047; CHECK-RV64V-NEXT:    sb a1, 0(a0)
5048; CHECK-RV64V-NEXT:    ret
5049;
5050; CHECK-RV32V-LABEL: test_nontemporal_S1_store_i8:
5051; CHECK-RV32V:       # %bb.0:
5052; CHECK-RV32V-NEXT:    ntl.s1
5053; CHECK-RV32V-NEXT:    sb a1, 0(a0)
5054; CHECK-RV32V-NEXT:    ret
5055  store i8 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5056  ret void
5057}
5058
5059define void @test_nontemporal_S1_store_half(ptr %p, half %v) {
5060; CHECK-RV64-LABEL: test_nontemporal_S1_store_half:
5061; CHECK-RV64:       # %bb.0:
5062; CHECK-RV64-NEXT:    ntl.s1
5063; CHECK-RV64-NEXT:    fsh fa0, 0(a0)
5064; CHECK-RV64-NEXT:    ret
5065;
5066; CHECK-RV32-LABEL: test_nontemporal_S1_store_half:
5067; CHECK-RV32:       # %bb.0:
5068; CHECK-RV32-NEXT:    ntl.s1
5069; CHECK-RV32-NEXT:    fsh fa0, 0(a0)
5070; CHECK-RV32-NEXT:    ret
5071;
5072; CHECK-RV64C-LABEL: test_nontemporal_S1_store_half:
5073; CHECK-RV64C:       # %bb.0:
5074; CHECK-RV64C-NEXT:    c.ntl.s1
5075; CHECK-RV64C-NEXT:    fsh fa0, 0(a0)
5076; CHECK-RV64C-NEXT:    ret
5077;
5078; CHECK-RV32C-LABEL: test_nontemporal_S1_store_half:
5079; CHECK-RV32C:       # %bb.0:
5080; CHECK-RV32C-NEXT:    c.ntl.s1
5081; CHECK-RV32C-NEXT:    fsh fa0, 0(a0)
5082; CHECK-RV32C-NEXT:    ret
5083;
5084; CHECK-RV64V-LABEL: test_nontemporal_S1_store_half:
5085; CHECK-RV64V:       # %bb.0:
5086; CHECK-RV64V-NEXT:    ntl.s1
5087; CHECK-RV64V-NEXT:    fsh fa0, 0(a0)
5088; CHECK-RV64V-NEXT:    ret
5089;
5090; CHECK-RV32V-LABEL: test_nontemporal_S1_store_half:
5091; CHECK-RV32V:       # %bb.0:
5092; CHECK-RV32V-NEXT:    ntl.s1
5093; CHECK-RV32V-NEXT:    fsh fa0, 0(a0)
5094; CHECK-RV32V-NEXT:    ret
5095  store half %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5096  ret void
5097}
5098
5099define void @test_nontemporal_S1_store_float(ptr %p, float %v) {
5100; CHECK-RV64-LABEL: test_nontemporal_S1_store_float:
5101; CHECK-RV64:       # %bb.0:
5102; CHECK-RV64-NEXT:    ntl.s1
5103; CHECK-RV64-NEXT:    fsw fa0, 0(a0)
5104; CHECK-RV64-NEXT:    ret
5105;
5106; CHECK-RV32-LABEL: test_nontemporal_S1_store_float:
5107; CHECK-RV32:       # %bb.0:
5108; CHECK-RV32-NEXT:    ntl.s1
5109; CHECK-RV32-NEXT:    fsw fa0, 0(a0)
5110; CHECK-RV32-NEXT:    ret
5111;
5112; CHECK-RV64C-LABEL: test_nontemporal_S1_store_float:
5113; CHECK-RV64C:       # %bb.0:
5114; CHECK-RV64C-NEXT:    c.ntl.s1
5115; CHECK-RV64C-NEXT:    fsw fa0, 0(a0)
5116; CHECK-RV64C-NEXT:    ret
5117;
5118; CHECK-RV32C-LABEL: test_nontemporal_S1_store_float:
5119; CHECK-RV32C:       # %bb.0:
5120; CHECK-RV32C-NEXT:    c.ntl.s1
5121; CHECK-RV32C-NEXT:    fsw fa0, 0(a0)
5122; CHECK-RV32C-NEXT:    ret
5123;
5124; CHECK-RV64V-LABEL: test_nontemporal_S1_store_float:
5125; CHECK-RV64V:       # %bb.0:
5126; CHECK-RV64V-NEXT:    ntl.s1
5127; CHECK-RV64V-NEXT:    fsw fa0, 0(a0)
5128; CHECK-RV64V-NEXT:    ret
5129;
5130; CHECK-RV32V-LABEL: test_nontemporal_S1_store_float:
5131; CHECK-RV32V:       # %bb.0:
5132; CHECK-RV32V-NEXT:    ntl.s1
5133; CHECK-RV32V-NEXT:    fsw fa0, 0(a0)
5134; CHECK-RV32V-NEXT:    ret
5135  store float %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5136  ret void
5137}
5138
5139define void @test_nontemporal_S1_store_double(ptr %p, double %v) {
5140; CHECK-RV64-LABEL: test_nontemporal_S1_store_double:
5141; CHECK-RV64:       # %bb.0:
5142; CHECK-RV64-NEXT:    ntl.s1
5143; CHECK-RV64-NEXT:    fsd fa0, 0(a0)
5144; CHECK-RV64-NEXT:    ret
5145;
5146; CHECK-RV32-LABEL: test_nontemporal_S1_store_double:
5147; CHECK-RV32:       # %bb.0:
5148; CHECK-RV32-NEXT:    ntl.s1
5149; CHECK-RV32-NEXT:    fsd fa0, 0(a0)
5150; CHECK-RV32-NEXT:    ret
5151;
5152; CHECK-RV64C-LABEL: test_nontemporal_S1_store_double:
5153; CHECK-RV64C:       # %bb.0:
5154; CHECK-RV64C-NEXT:    c.ntl.s1
5155; CHECK-RV64C-NEXT:    fsd fa0, 0(a0)
5156; CHECK-RV64C-NEXT:    ret
5157;
5158; CHECK-RV32C-LABEL: test_nontemporal_S1_store_double:
5159; CHECK-RV32C:       # %bb.0:
5160; CHECK-RV32C-NEXT:    c.ntl.s1
5161; CHECK-RV32C-NEXT:    fsd fa0, 0(a0)
5162; CHECK-RV32C-NEXT:    ret
5163;
5164; CHECK-RV64V-LABEL: test_nontemporal_S1_store_double:
5165; CHECK-RV64V:       # %bb.0:
5166; CHECK-RV64V-NEXT:    ntl.s1
5167; CHECK-RV64V-NEXT:    fsd fa0, 0(a0)
5168; CHECK-RV64V-NEXT:    ret
5169;
5170; CHECK-RV32V-LABEL: test_nontemporal_S1_store_double:
5171; CHECK-RV32V:       # %bb.0:
5172; CHECK-RV32V-NEXT:    ntl.s1
5173; CHECK-RV32V-NEXT:    fsd fa0, 0(a0)
5174; CHECK-RV32V-NEXT:    ret
5175  store double %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5176  ret void
5177}
5178
5179define void @test_nontemporal_S1_store_v16i8(ptr %p, <16 x i8> %v) {
5180; CHECK-RV64-LABEL: test_nontemporal_S1_store_v16i8:
5181; CHECK-RV64:       # %bb.0:
5182; CHECK-RV64-NEXT:    addi sp, sp, -16
5183; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 16
5184; CHECK-RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
5185; CHECK-RV64-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
5186; CHECK-RV64-NEXT:    .cfi_offset s0, -8
5187; CHECK-RV64-NEXT:    .cfi_offset s1, -16
5188; CHECK-RV64-NEXT:    lbu a2, 0(a1)
5189; CHECK-RV64-NEXT:    lbu a3, 8(a1)
5190; CHECK-RV64-NEXT:    lbu a4, 16(a1)
5191; CHECK-RV64-NEXT:    lbu a5, 24(a1)
5192; CHECK-RV64-NEXT:    lbu a6, 32(a1)
5193; CHECK-RV64-NEXT:    lbu a7, 40(a1)
5194; CHECK-RV64-NEXT:    lbu t0, 48(a1)
5195; CHECK-RV64-NEXT:    lbu t1, 56(a1)
5196; CHECK-RV64-NEXT:    lbu t2, 64(a1)
5197; CHECK-RV64-NEXT:    lbu t3, 72(a1)
5198; CHECK-RV64-NEXT:    lbu t4, 80(a1)
5199; CHECK-RV64-NEXT:    lbu t5, 88(a1)
5200; CHECK-RV64-NEXT:    lbu t6, 96(a1)
5201; CHECK-RV64-NEXT:    lbu s0, 104(a1)
5202; CHECK-RV64-NEXT:    lbu s1, 112(a1)
5203; CHECK-RV64-NEXT:    lbu a1, 120(a1)
5204; CHECK-RV64-NEXT:    ntl.s1
5205; CHECK-RV64-NEXT:    sb t6, 12(a0)
5206; CHECK-RV64-NEXT:    ntl.s1
5207; CHECK-RV64-NEXT:    sb s0, 13(a0)
5208; CHECK-RV64-NEXT:    ntl.s1
5209; CHECK-RV64-NEXT:    sb s1, 14(a0)
5210; CHECK-RV64-NEXT:    ntl.s1
5211; CHECK-RV64-NEXT:    sb a1, 15(a0)
5212; CHECK-RV64-NEXT:    ntl.s1
5213; CHECK-RV64-NEXT:    sb t2, 8(a0)
5214; CHECK-RV64-NEXT:    ntl.s1
5215; CHECK-RV64-NEXT:    sb t3, 9(a0)
5216; CHECK-RV64-NEXT:    ntl.s1
5217; CHECK-RV64-NEXT:    sb t4, 10(a0)
5218; CHECK-RV64-NEXT:    ntl.s1
5219; CHECK-RV64-NEXT:    sb t5, 11(a0)
5220; CHECK-RV64-NEXT:    ntl.s1
5221; CHECK-RV64-NEXT:    sb a6, 4(a0)
5222; CHECK-RV64-NEXT:    ntl.s1
5223; CHECK-RV64-NEXT:    sb a7, 5(a0)
5224; CHECK-RV64-NEXT:    ntl.s1
5225; CHECK-RV64-NEXT:    sb t0, 6(a0)
5226; CHECK-RV64-NEXT:    ntl.s1
5227; CHECK-RV64-NEXT:    sb t1, 7(a0)
5228; CHECK-RV64-NEXT:    ntl.s1
5229; CHECK-RV64-NEXT:    sb a2, 0(a0)
5230; CHECK-RV64-NEXT:    ntl.s1
5231; CHECK-RV64-NEXT:    sb a3, 1(a0)
5232; CHECK-RV64-NEXT:    ntl.s1
5233; CHECK-RV64-NEXT:    sb a4, 2(a0)
5234; CHECK-RV64-NEXT:    ntl.s1
5235; CHECK-RV64-NEXT:    sb a5, 3(a0)
5236; CHECK-RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
5237; CHECK-RV64-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
5238; CHECK-RV64-NEXT:    .cfi_restore s0
5239; CHECK-RV64-NEXT:    .cfi_restore s1
5240; CHECK-RV64-NEXT:    addi sp, sp, 16
5241; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 0
5242; CHECK-RV64-NEXT:    ret
5243;
5244; CHECK-RV32-LABEL: test_nontemporal_S1_store_v16i8:
5245; CHECK-RV32:       # %bb.0:
5246; CHECK-RV32-NEXT:    addi sp, sp, -16
5247; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 16
5248; CHECK-RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
5249; CHECK-RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
5250; CHECK-RV32-NEXT:    .cfi_offset s0, -4
5251; CHECK-RV32-NEXT:    .cfi_offset s1, -8
5252; CHECK-RV32-NEXT:    lbu a2, 0(a1)
5253; CHECK-RV32-NEXT:    lbu a3, 4(a1)
5254; CHECK-RV32-NEXT:    lbu a4, 8(a1)
5255; CHECK-RV32-NEXT:    lbu a5, 12(a1)
5256; CHECK-RV32-NEXT:    lbu a6, 16(a1)
5257; CHECK-RV32-NEXT:    lbu a7, 20(a1)
5258; CHECK-RV32-NEXT:    lbu t0, 24(a1)
5259; CHECK-RV32-NEXT:    lbu t1, 28(a1)
5260; CHECK-RV32-NEXT:    lbu t2, 32(a1)
5261; CHECK-RV32-NEXT:    lbu t3, 36(a1)
5262; CHECK-RV32-NEXT:    lbu t4, 40(a1)
5263; CHECK-RV32-NEXT:    lbu t5, 44(a1)
5264; CHECK-RV32-NEXT:    lbu t6, 48(a1)
5265; CHECK-RV32-NEXT:    lbu s0, 52(a1)
5266; CHECK-RV32-NEXT:    lbu s1, 56(a1)
5267; CHECK-RV32-NEXT:    lbu a1, 60(a1)
5268; CHECK-RV32-NEXT:    ntl.s1
5269; CHECK-RV32-NEXT:    sb t6, 12(a0)
5270; CHECK-RV32-NEXT:    ntl.s1
5271; CHECK-RV32-NEXT:    sb s0, 13(a0)
5272; CHECK-RV32-NEXT:    ntl.s1
5273; CHECK-RV32-NEXT:    sb s1, 14(a0)
5274; CHECK-RV32-NEXT:    ntl.s1
5275; CHECK-RV32-NEXT:    sb a1, 15(a0)
5276; CHECK-RV32-NEXT:    ntl.s1
5277; CHECK-RV32-NEXT:    sb t2, 8(a0)
5278; CHECK-RV32-NEXT:    ntl.s1
5279; CHECK-RV32-NEXT:    sb t3, 9(a0)
5280; CHECK-RV32-NEXT:    ntl.s1
5281; CHECK-RV32-NEXT:    sb t4, 10(a0)
5282; CHECK-RV32-NEXT:    ntl.s1
5283; CHECK-RV32-NEXT:    sb t5, 11(a0)
5284; CHECK-RV32-NEXT:    ntl.s1
5285; CHECK-RV32-NEXT:    sb a6, 4(a0)
5286; CHECK-RV32-NEXT:    ntl.s1
5287; CHECK-RV32-NEXT:    sb a7, 5(a0)
5288; CHECK-RV32-NEXT:    ntl.s1
5289; CHECK-RV32-NEXT:    sb t0, 6(a0)
5290; CHECK-RV32-NEXT:    ntl.s1
5291; CHECK-RV32-NEXT:    sb t1, 7(a0)
5292; CHECK-RV32-NEXT:    ntl.s1
5293; CHECK-RV32-NEXT:    sb a2, 0(a0)
5294; CHECK-RV32-NEXT:    ntl.s1
5295; CHECK-RV32-NEXT:    sb a3, 1(a0)
5296; CHECK-RV32-NEXT:    ntl.s1
5297; CHECK-RV32-NEXT:    sb a4, 2(a0)
5298; CHECK-RV32-NEXT:    ntl.s1
5299; CHECK-RV32-NEXT:    sb a5, 3(a0)
5300; CHECK-RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
5301; CHECK-RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
5302; CHECK-RV32-NEXT:    .cfi_restore s0
5303; CHECK-RV32-NEXT:    .cfi_restore s1
5304; CHECK-RV32-NEXT:    addi sp, sp, 16
5305; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 0
5306; CHECK-RV32-NEXT:    ret
5307;
5308; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v16i8:
5309; CHECK-RV64C:       # %bb.0:
5310; CHECK-RV64C-NEXT:    addi sp, sp, -16
5311; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 16
5312; CHECK-RV64C-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
5313; CHECK-RV64C-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
5314; CHECK-RV64C-NEXT:    .cfi_offset s0, -8
5315; CHECK-RV64C-NEXT:    .cfi_offset s1, -16
5316; CHECK-RV64C-NEXT:    lbu a6, 0(a1)
5317; CHECK-RV64C-NEXT:    lbu a7, 8(a1)
5318; CHECK-RV64C-NEXT:    lbu t0, 16(a1)
5319; CHECK-RV64C-NEXT:    lbu t1, 24(a1)
5320; CHECK-RV64C-NEXT:    lbu t2, 32(a1)
5321; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
5322; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
5323; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
5324; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
5325; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
5326; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
5327; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
5328; CHECK-RV64C-NEXT:    lbu a2, 96(a1)
5329; CHECK-RV64C-NEXT:    lbu s0, 104(a1)
5330; CHECK-RV64C-NEXT:    lbu s1, 112(a1)
5331; CHECK-RV64C-NEXT:    lbu a1, 120(a1)
5332; CHECK-RV64C-NEXT:    c.ntl.s1
5333; CHECK-RV64C-NEXT:    sb a2, 12(a0)
5334; CHECK-RV64C-NEXT:    c.ntl.s1
5335; CHECK-RV64C-NEXT:    sb s0, 13(a0)
5336; CHECK-RV64C-NEXT:    c.ntl.s1
5337; CHECK-RV64C-NEXT:    sb s1, 14(a0)
5338; CHECK-RV64C-NEXT:    c.ntl.s1
5339; CHECK-RV64C-NEXT:    sb a1, 15(a0)
5340; CHECK-RV64C-NEXT:    c.ntl.s1
5341; CHECK-RV64C-NEXT:    sb t6, 8(a0)
5342; CHECK-RV64C-NEXT:    c.ntl.s1
5343; CHECK-RV64C-NEXT:    sb a3, 9(a0)
5344; CHECK-RV64C-NEXT:    c.ntl.s1
5345; CHECK-RV64C-NEXT:    sb a4, 10(a0)
5346; CHECK-RV64C-NEXT:    c.ntl.s1
5347; CHECK-RV64C-NEXT:    sb a5, 11(a0)
5348; CHECK-RV64C-NEXT:    c.ntl.s1
5349; CHECK-RV64C-NEXT:    sb t2, 4(a0)
5350; CHECK-RV64C-NEXT:    c.ntl.s1
5351; CHECK-RV64C-NEXT:    sb t3, 5(a0)
5352; CHECK-RV64C-NEXT:    c.ntl.s1
5353; CHECK-RV64C-NEXT:    sb t4, 6(a0)
5354; CHECK-RV64C-NEXT:    c.ntl.s1
5355; CHECK-RV64C-NEXT:    sb t5, 7(a0)
5356; CHECK-RV64C-NEXT:    c.ntl.s1
5357; CHECK-RV64C-NEXT:    sb a6, 0(a0)
5358; CHECK-RV64C-NEXT:    c.ntl.s1
5359; CHECK-RV64C-NEXT:    sb a7, 1(a0)
5360; CHECK-RV64C-NEXT:    c.ntl.s1
5361; CHECK-RV64C-NEXT:    sb t0, 2(a0)
5362; CHECK-RV64C-NEXT:    c.ntl.s1
5363; CHECK-RV64C-NEXT:    sb t1, 3(a0)
5364; CHECK-RV64C-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
5365; CHECK-RV64C-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
5366; CHECK-RV64C-NEXT:    .cfi_restore s0
5367; CHECK-RV64C-NEXT:    .cfi_restore s1
5368; CHECK-RV64C-NEXT:    addi sp, sp, 16
5369; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 0
5370; CHECK-RV64C-NEXT:    ret
5371;
5372; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v16i8:
5373; CHECK-RV32C:       # %bb.0:
5374; CHECK-RV32C-NEXT:    addi sp, sp, -16
5375; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 16
5376; CHECK-RV32C-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
5377; CHECK-RV32C-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
5378; CHECK-RV32C-NEXT:    .cfi_offset s0, -4
5379; CHECK-RV32C-NEXT:    .cfi_offset s1, -8
5380; CHECK-RV32C-NEXT:    lbu a6, 0(a1)
5381; CHECK-RV32C-NEXT:    lbu a7, 4(a1)
5382; CHECK-RV32C-NEXT:    lbu t0, 8(a1)
5383; CHECK-RV32C-NEXT:    lbu t1, 12(a1)
5384; CHECK-RV32C-NEXT:    lbu t2, 16(a1)
5385; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
5386; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
5387; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
5388; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
5389; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
5390; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
5391; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
5392; CHECK-RV32C-NEXT:    lbu a2, 48(a1)
5393; CHECK-RV32C-NEXT:    lbu s0, 52(a1)
5394; CHECK-RV32C-NEXT:    lbu s1, 56(a1)
5395; CHECK-RV32C-NEXT:    lbu a1, 60(a1)
5396; CHECK-RV32C-NEXT:    c.ntl.s1
5397; CHECK-RV32C-NEXT:    sb a2, 12(a0)
5398; CHECK-RV32C-NEXT:    c.ntl.s1
5399; CHECK-RV32C-NEXT:    sb s0, 13(a0)
5400; CHECK-RV32C-NEXT:    c.ntl.s1
5401; CHECK-RV32C-NEXT:    sb s1, 14(a0)
5402; CHECK-RV32C-NEXT:    c.ntl.s1
5403; CHECK-RV32C-NEXT:    sb a1, 15(a0)
5404; CHECK-RV32C-NEXT:    c.ntl.s1
5405; CHECK-RV32C-NEXT:    sb t6, 8(a0)
5406; CHECK-RV32C-NEXT:    c.ntl.s1
5407; CHECK-RV32C-NEXT:    sb a3, 9(a0)
5408; CHECK-RV32C-NEXT:    c.ntl.s1
5409; CHECK-RV32C-NEXT:    sb a4, 10(a0)
5410; CHECK-RV32C-NEXT:    c.ntl.s1
5411; CHECK-RV32C-NEXT:    sb a5, 11(a0)
5412; CHECK-RV32C-NEXT:    c.ntl.s1
5413; CHECK-RV32C-NEXT:    sb t2, 4(a0)
5414; CHECK-RV32C-NEXT:    c.ntl.s1
5415; CHECK-RV32C-NEXT:    sb t3, 5(a0)
5416; CHECK-RV32C-NEXT:    c.ntl.s1
5417; CHECK-RV32C-NEXT:    sb t4, 6(a0)
5418; CHECK-RV32C-NEXT:    c.ntl.s1
5419; CHECK-RV32C-NEXT:    sb t5, 7(a0)
5420; CHECK-RV32C-NEXT:    c.ntl.s1
5421; CHECK-RV32C-NEXT:    sb a6, 0(a0)
5422; CHECK-RV32C-NEXT:    c.ntl.s1
5423; CHECK-RV32C-NEXT:    sb a7, 1(a0)
5424; CHECK-RV32C-NEXT:    c.ntl.s1
5425; CHECK-RV32C-NEXT:    sb t0, 2(a0)
5426; CHECK-RV32C-NEXT:    c.ntl.s1
5427; CHECK-RV32C-NEXT:    sb t1, 3(a0)
5428; CHECK-RV32C-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
5429; CHECK-RV32C-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
5430; CHECK-RV32C-NEXT:    .cfi_restore s0
5431; CHECK-RV32C-NEXT:    .cfi_restore s1
5432; CHECK-RV32C-NEXT:    addi sp, sp, 16
5433; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 0
5434; CHECK-RV32C-NEXT:    ret
5435;
5436; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v16i8:
5437; CHECK-RV64V:       # %bb.0:
5438; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
5439; CHECK-RV64V-NEXT:    ntl.s1
5440; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
5441; CHECK-RV64V-NEXT:    ret
5442;
5443; CHECK-RV32V-LABEL: test_nontemporal_S1_store_v16i8:
5444; CHECK-RV32V:       # %bb.0:
5445; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
5446; CHECK-RV32V-NEXT:    ntl.s1
5447; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
5448; CHECK-RV32V-NEXT:    ret
5449  store <16 x i8> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5450  ret void
5451}
5452
5453define void @test_nontemporal_S1_store_v8i16(ptr %p, <8 x i16> %v) {
5454; CHECK-RV64-LABEL: test_nontemporal_S1_store_v8i16:
5455; CHECK-RV64:       # %bb.0:
5456; CHECK-RV64-NEXT:    lh a2, 0(a1)
5457; CHECK-RV64-NEXT:    lh a3, 8(a1)
5458; CHECK-RV64-NEXT:    lh a4, 16(a1)
5459; CHECK-RV64-NEXT:    lh a5, 24(a1)
5460; CHECK-RV64-NEXT:    lh a6, 32(a1)
5461; CHECK-RV64-NEXT:    lh a7, 40(a1)
5462; CHECK-RV64-NEXT:    lh t0, 48(a1)
5463; CHECK-RV64-NEXT:    lh a1, 56(a1)
5464; CHECK-RV64-NEXT:    ntl.s1
5465; CHECK-RV64-NEXT:    sh a6, 8(a0)
5466; CHECK-RV64-NEXT:    ntl.s1
5467; CHECK-RV64-NEXT:    sh a7, 10(a0)
5468; CHECK-RV64-NEXT:    ntl.s1
5469; CHECK-RV64-NEXT:    sh t0, 12(a0)
5470; CHECK-RV64-NEXT:    ntl.s1
5471; CHECK-RV64-NEXT:    sh a1, 14(a0)
5472; CHECK-RV64-NEXT:    ntl.s1
5473; CHECK-RV64-NEXT:    sh a2, 0(a0)
5474; CHECK-RV64-NEXT:    ntl.s1
5475; CHECK-RV64-NEXT:    sh a3, 2(a0)
5476; CHECK-RV64-NEXT:    ntl.s1
5477; CHECK-RV64-NEXT:    sh a4, 4(a0)
5478; CHECK-RV64-NEXT:    ntl.s1
5479; CHECK-RV64-NEXT:    sh a5, 6(a0)
5480; CHECK-RV64-NEXT:    ret
5481;
5482; CHECK-RV32-LABEL: test_nontemporal_S1_store_v8i16:
5483; CHECK-RV32:       # %bb.0:
5484; CHECK-RV32-NEXT:    lh a2, 0(a1)
5485; CHECK-RV32-NEXT:    lh a3, 4(a1)
5486; CHECK-RV32-NEXT:    lh a4, 8(a1)
5487; CHECK-RV32-NEXT:    lh a5, 12(a1)
5488; CHECK-RV32-NEXT:    lh a6, 16(a1)
5489; CHECK-RV32-NEXT:    lh a7, 20(a1)
5490; CHECK-RV32-NEXT:    lh t0, 24(a1)
5491; CHECK-RV32-NEXT:    lh a1, 28(a1)
5492; CHECK-RV32-NEXT:    ntl.s1
5493; CHECK-RV32-NEXT:    sh a6, 8(a0)
5494; CHECK-RV32-NEXT:    ntl.s1
5495; CHECK-RV32-NEXT:    sh a7, 10(a0)
5496; CHECK-RV32-NEXT:    ntl.s1
5497; CHECK-RV32-NEXT:    sh t0, 12(a0)
5498; CHECK-RV32-NEXT:    ntl.s1
5499; CHECK-RV32-NEXT:    sh a1, 14(a0)
5500; CHECK-RV32-NEXT:    ntl.s1
5501; CHECK-RV32-NEXT:    sh a2, 0(a0)
5502; CHECK-RV32-NEXT:    ntl.s1
5503; CHECK-RV32-NEXT:    sh a3, 2(a0)
5504; CHECK-RV32-NEXT:    ntl.s1
5505; CHECK-RV32-NEXT:    sh a4, 4(a0)
5506; CHECK-RV32-NEXT:    ntl.s1
5507; CHECK-RV32-NEXT:    sh a5, 6(a0)
5508; CHECK-RV32-NEXT:    ret
5509;
5510; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v8i16:
5511; CHECK-RV64C:       # %bb.0:
5512; CHECK-RV64C-NEXT:    lh a6, 0(a1)
5513; CHECK-RV64C-NEXT:    lh a7, 8(a1)
5514; CHECK-RV64C-NEXT:    lh t0, 16(a1)
5515; CHECK-RV64C-NEXT:    lh a5, 24(a1)
5516; CHECK-RV64C-NEXT:    lh a2, 32(a1)
5517; CHECK-RV64C-NEXT:    lh a3, 40(a1)
5518; CHECK-RV64C-NEXT:    lh a4, 48(a1)
5519; CHECK-RV64C-NEXT:    lh a1, 56(a1)
5520; CHECK-RV64C-NEXT:    c.ntl.s1
5521; CHECK-RV64C-NEXT:    sh a2, 8(a0)
5522; CHECK-RV64C-NEXT:    c.ntl.s1
5523; CHECK-RV64C-NEXT:    sh a3, 10(a0)
5524; CHECK-RV64C-NEXT:    c.ntl.s1
5525; CHECK-RV64C-NEXT:    sh a4, 12(a0)
5526; CHECK-RV64C-NEXT:    c.ntl.s1
5527; CHECK-RV64C-NEXT:    sh a1, 14(a0)
5528; CHECK-RV64C-NEXT:    c.ntl.s1
5529; CHECK-RV64C-NEXT:    sh a6, 0(a0)
5530; CHECK-RV64C-NEXT:    c.ntl.s1
5531; CHECK-RV64C-NEXT:    sh a7, 2(a0)
5532; CHECK-RV64C-NEXT:    c.ntl.s1
5533; CHECK-RV64C-NEXT:    sh t0, 4(a0)
5534; CHECK-RV64C-NEXT:    c.ntl.s1
5535; CHECK-RV64C-NEXT:    sh a5, 6(a0)
5536; CHECK-RV64C-NEXT:    ret
5537;
5538; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v8i16:
5539; CHECK-RV32C:       # %bb.0:
5540; CHECK-RV32C-NEXT:    lh a6, 0(a1)
5541; CHECK-RV32C-NEXT:    lh a7, 4(a1)
5542; CHECK-RV32C-NEXT:    lh t0, 8(a1)
5543; CHECK-RV32C-NEXT:    lh a5, 12(a1)
5544; CHECK-RV32C-NEXT:    lh a2, 16(a1)
5545; CHECK-RV32C-NEXT:    lh a3, 20(a1)
5546; CHECK-RV32C-NEXT:    lh a4, 24(a1)
5547; CHECK-RV32C-NEXT:    lh a1, 28(a1)
5548; CHECK-RV32C-NEXT:    c.ntl.s1
5549; CHECK-RV32C-NEXT:    sh a2, 8(a0)
5550; CHECK-RV32C-NEXT:    c.ntl.s1
5551; CHECK-RV32C-NEXT:    sh a3, 10(a0)
5552; CHECK-RV32C-NEXT:    c.ntl.s1
5553; CHECK-RV32C-NEXT:    sh a4, 12(a0)
5554; CHECK-RV32C-NEXT:    c.ntl.s1
5555; CHECK-RV32C-NEXT:    sh a1, 14(a0)
5556; CHECK-RV32C-NEXT:    c.ntl.s1
5557; CHECK-RV32C-NEXT:    sh a6, 0(a0)
5558; CHECK-RV32C-NEXT:    c.ntl.s1
5559; CHECK-RV32C-NEXT:    sh a7, 2(a0)
5560; CHECK-RV32C-NEXT:    c.ntl.s1
5561; CHECK-RV32C-NEXT:    sh t0, 4(a0)
5562; CHECK-RV32C-NEXT:    c.ntl.s1
5563; CHECK-RV32C-NEXT:    sh a5, 6(a0)
5564; CHECK-RV32C-NEXT:    ret
5565;
5566; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v8i16:
5567; CHECK-RV64V:       # %bb.0:
5568; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
5569; CHECK-RV64V-NEXT:    ntl.s1
5570; CHECK-RV64V-NEXT:    vse16.v v8, (a0)
5571; CHECK-RV64V-NEXT:    ret
5572;
5573; CHECK-RV32V-LABEL: test_nontemporal_S1_store_v8i16:
5574; CHECK-RV32V:       # %bb.0:
5575; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
5576; CHECK-RV32V-NEXT:    ntl.s1
5577; CHECK-RV32V-NEXT:    vse16.v v8, (a0)
5578; CHECK-RV32V-NEXT:    ret
5579  store <8 x i16> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5580  ret void
5581}
5582
5583define void @test_nontemporal_S1_store_v4i32(ptr %p, <4 x i32> %v) {
5584; CHECK-RV64-LABEL: test_nontemporal_S1_store_v4i32:
5585; CHECK-RV64:       # %bb.0:
5586; CHECK-RV64-NEXT:    lw a2, 0(a1)
5587; CHECK-RV64-NEXT:    lw a3, 8(a1)
5588; CHECK-RV64-NEXT:    lw a4, 16(a1)
5589; CHECK-RV64-NEXT:    lw a1, 24(a1)
5590; CHECK-RV64-NEXT:    ntl.s1
5591; CHECK-RV64-NEXT:    sw a2, 0(a0)
5592; CHECK-RV64-NEXT:    ntl.s1
5593; CHECK-RV64-NEXT:    sw a3, 4(a0)
5594; CHECK-RV64-NEXT:    ntl.s1
5595; CHECK-RV64-NEXT:    sw a4, 8(a0)
5596; CHECK-RV64-NEXT:    ntl.s1
5597; CHECK-RV64-NEXT:    sw a1, 12(a0)
5598; CHECK-RV64-NEXT:    ret
5599;
5600; CHECK-RV32-LABEL: test_nontemporal_S1_store_v4i32:
5601; CHECK-RV32:       # %bb.0:
5602; CHECK-RV32-NEXT:    lw a2, 0(a1)
5603; CHECK-RV32-NEXT:    lw a3, 4(a1)
5604; CHECK-RV32-NEXT:    lw a4, 8(a1)
5605; CHECK-RV32-NEXT:    lw a1, 12(a1)
5606; CHECK-RV32-NEXT:    ntl.s1
5607; CHECK-RV32-NEXT:    sw a2, 0(a0)
5608; CHECK-RV32-NEXT:    ntl.s1
5609; CHECK-RV32-NEXT:    sw a3, 4(a0)
5610; CHECK-RV32-NEXT:    ntl.s1
5611; CHECK-RV32-NEXT:    sw a4, 8(a0)
5612; CHECK-RV32-NEXT:    ntl.s1
5613; CHECK-RV32-NEXT:    sw a1, 12(a0)
5614; CHECK-RV32-NEXT:    ret
5615;
5616; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v4i32:
5617; CHECK-RV64C:       # %bb.0:
5618; CHECK-RV64C-NEXT:    lw a2, 0(a1)
5619; CHECK-RV64C-NEXT:    lw a3, 8(a1)
5620; CHECK-RV64C-NEXT:    lw a4, 16(a1)
5621; CHECK-RV64C-NEXT:    lw a1, 24(a1)
5622; CHECK-RV64C-NEXT:    c.ntl.s1
5623; CHECK-RV64C-NEXT:    sw a2, 0(a0)
5624; CHECK-RV64C-NEXT:    c.ntl.s1
5625; CHECK-RV64C-NEXT:    sw a3, 4(a0)
5626; CHECK-RV64C-NEXT:    c.ntl.s1
5627; CHECK-RV64C-NEXT:    sw a4, 8(a0)
5628; CHECK-RV64C-NEXT:    c.ntl.s1
5629; CHECK-RV64C-NEXT:    sw a1, 12(a0)
5630; CHECK-RV64C-NEXT:    ret
5631;
5632; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v4i32:
5633; CHECK-RV32C:       # %bb.0:
5634; CHECK-RV32C-NEXT:    lw a2, 0(a1)
5635; CHECK-RV32C-NEXT:    lw a3, 4(a1)
5636; CHECK-RV32C-NEXT:    lw a4, 8(a1)
5637; CHECK-RV32C-NEXT:    lw a1, 12(a1)
5638; CHECK-RV32C-NEXT:    c.ntl.s1
5639; CHECK-RV32C-NEXT:    sw a2, 0(a0)
5640; CHECK-RV32C-NEXT:    c.ntl.s1
5641; CHECK-RV32C-NEXT:    sw a3, 4(a0)
5642; CHECK-RV32C-NEXT:    c.ntl.s1
5643; CHECK-RV32C-NEXT:    sw a4, 8(a0)
5644; CHECK-RV32C-NEXT:    c.ntl.s1
5645; CHECK-RV32C-NEXT:    sw a1, 12(a0)
5646; CHECK-RV32C-NEXT:    ret
5647;
5648; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v4i32:
5649; CHECK-RV64V:       # %bb.0:
5650; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5651; CHECK-RV64V-NEXT:    ntl.s1
5652; CHECK-RV64V-NEXT:    vse32.v v8, (a0)
5653; CHECK-RV64V-NEXT:    ret
5654;
5655; CHECK-RV32V-LABEL: test_nontemporal_S1_store_v4i32:
5656; CHECK-RV32V:       # %bb.0:
5657; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
5658; CHECK-RV32V-NEXT:    ntl.s1
5659; CHECK-RV32V-NEXT:    vse32.v v8, (a0)
5660; CHECK-RV32V-NEXT:    ret
5661  store <4 x i32> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5662  ret void
5663}
5664
5665define void @test_nontemporal_S1_store_v2i64(ptr %p, <2 x i64> %v) {
5666; CHECK-RV64-LABEL: test_nontemporal_S1_store_v2i64:
5667; CHECK-RV64:       # %bb.0:
5668; CHECK-RV64-NEXT:    ntl.s1
5669; CHECK-RV64-NEXT:    sd a1, 0(a0)
5670; CHECK-RV64-NEXT:    ntl.s1
5671; CHECK-RV64-NEXT:    sd a2, 8(a0)
5672; CHECK-RV64-NEXT:    ret
5673;
5674; CHECK-RV32-LABEL: test_nontemporal_S1_store_v2i64:
5675; CHECK-RV32:       # %bb.0:
5676; CHECK-RV32-NEXT:    lw a2, 0(a1)
5677; CHECK-RV32-NEXT:    lw a3, 4(a1)
5678; CHECK-RV32-NEXT:    lw a4, 8(a1)
5679; CHECK-RV32-NEXT:    lw a1, 12(a1)
5680; CHECK-RV32-NEXT:    ntl.s1
5681; CHECK-RV32-NEXT:    sw a2, 0(a0)
5682; CHECK-RV32-NEXT:    ntl.s1
5683; CHECK-RV32-NEXT:    sw a3, 4(a0)
5684; CHECK-RV32-NEXT:    ntl.s1
5685; CHECK-RV32-NEXT:    sw a4, 8(a0)
5686; CHECK-RV32-NEXT:    ntl.s1
5687; CHECK-RV32-NEXT:    sw a1, 12(a0)
5688; CHECK-RV32-NEXT:    ret
5689;
5690; CHECK-RV64C-LABEL: test_nontemporal_S1_store_v2i64:
5691; CHECK-RV64C:       # %bb.0:
5692; CHECK-RV64C-NEXT:    c.ntl.s1
5693; CHECK-RV64C-NEXT:    sd a1, 0(a0)
5694; CHECK-RV64C-NEXT:    c.ntl.s1
5695; CHECK-RV64C-NEXT:    sd a2, 8(a0)
5696; CHECK-RV64C-NEXT:    ret
5697;
5698; CHECK-RV32C-LABEL: test_nontemporal_S1_store_v2i64:
5699; CHECK-RV32C:       # %bb.0:
5700; CHECK-RV32C-NEXT:    lw a2, 0(a1)
5701; CHECK-RV32C-NEXT:    lw a3, 4(a1)
5702; CHECK-RV32C-NEXT:    lw a4, 8(a1)
5703; CHECK-RV32C-NEXT:    lw a1, 12(a1)
5704; CHECK-RV32C-NEXT:    c.ntl.s1
5705; CHECK-RV32C-NEXT:    sw a2, 0(a0)
5706; CHECK-RV32C-NEXT:    c.ntl.s1
5707; CHECK-RV32C-NEXT:    sw a3, 4(a0)
5708; CHECK-RV32C-NEXT:    c.ntl.s1
5709; CHECK-RV32C-NEXT:    sw a4, 8(a0)
5710; CHECK-RV32C-NEXT:    c.ntl.s1
5711; CHECK-RV32C-NEXT:    sw a1, 12(a0)
5712; CHECK-RV32C-NEXT:    ret
5713;
5714; CHECK-RV64V-LABEL: test_nontemporal_S1_store_v2i64:
5715; CHECK-RV64V:       # %bb.0:
5716; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
5717; CHECK-RV64V-NEXT:    ntl.s1
5718; CHECK-RV64V-NEXT:    vse64.v v8, (a0)
5719; CHECK-RV64V-NEXT:    ret
5720;
5721; CHECK-RV32V-LABEL: test_nontemporal_S1_store_v2i64:
5722; CHECK-RV32V:       # %bb.0:
5723; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
5724; CHECK-RV32V-NEXT:    ntl.s1
5725; CHECK-RV32V-NEXT:    vse64.v v8, (a0)
5726; CHECK-RV32V-NEXT:    ret
5727  store <2 x i64> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !3
5728  ret void
5729}
5730
5731define i64 @test_nontemporal_ALL_load_i64(ptr %p) {
5732; CHECK-RV64-LABEL: test_nontemporal_ALL_load_i64:
5733; CHECK-RV64:       # %bb.0:
5734; CHECK-RV64-NEXT:    ntl.all
5735; CHECK-RV64-NEXT:    ld a0, 0(a0)
5736; CHECK-RV64-NEXT:    ret
5737;
5738; CHECK-RV32-LABEL: test_nontemporal_ALL_load_i64:
5739; CHECK-RV32:       # %bb.0:
5740; CHECK-RV32-NEXT:    ntl.all
5741; CHECK-RV32-NEXT:    lw a2, 0(a0)
5742; CHECK-RV32-NEXT:    ntl.all
5743; CHECK-RV32-NEXT:    lw a1, 4(a0)
5744; CHECK-RV32-NEXT:    mv a0, a2
5745; CHECK-RV32-NEXT:    ret
5746;
5747; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_i64:
5748; CHECK-RV64C:       # %bb.0:
5749; CHECK-RV64C-NEXT:    c.ntl.all
5750; CHECK-RV64C-NEXT:    ld a0, 0(a0)
5751; CHECK-RV64C-NEXT:    ret
5752;
5753; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_i64:
5754; CHECK-RV32C:       # %bb.0:
5755; CHECK-RV32C-NEXT:    c.ntl.all
5756; CHECK-RV32C-NEXT:    lw a2, 0(a0)
5757; CHECK-RV32C-NEXT:    c.ntl.all
5758; CHECK-RV32C-NEXT:    lw a1, 4(a0)
5759; CHECK-RV32C-NEXT:    mv a0, a2
5760; CHECK-RV32C-NEXT:    ret
5761;
5762; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_i64:
5763; CHECK-RV64V:       # %bb.0:
5764; CHECK-RV64V-NEXT:    ntl.all
5765; CHECK-RV64V-NEXT:    ld a0, 0(a0)
5766; CHECK-RV64V-NEXT:    ret
5767;
5768; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_i64:
5769; CHECK-RV32V:       # %bb.0:
5770; CHECK-RV32V-NEXT:    ntl.all
5771; CHECK-RV32V-NEXT:    lw a2, 0(a0)
5772; CHECK-RV32V-NEXT:    ntl.all
5773; CHECK-RV32V-NEXT:    lw a1, 4(a0)
5774; CHECK-RV32V-NEXT:    mv a0, a2
5775; CHECK-RV32V-NEXT:    ret
5776  %1 = load i64, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
5777  ret i64 %1
5778}
5779
5780define i32 @test_nontemporal_ALL_load_i32(ptr %p) {
5781; CHECK-RV64-LABEL: test_nontemporal_ALL_load_i32:
5782; CHECK-RV64:       # %bb.0:
5783; CHECK-RV64-NEXT:    ntl.all
5784; CHECK-RV64-NEXT:    lw a0, 0(a0)
5785; CHECK-RV64-NEXT:    ret
5786;
5787; CHECK-RV32-LABEL: test_nontemporal_ALL_load_i32:
5788; CHECK-RV32:       # %bb.0:
5789; CHECK-RV32-NEXT:    ntl.all
5790; CHECK-RV32-NEXT:    lw a0, 0(a0)
5791; CHECK-RV32-NEXT:    ret
5792;
5793; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_i32:
5794; CHECK-RV64C:       # %bb.0:
5795; CHECK-RV64C-NEXT:    c.ntl.all
5796; CHECK-RV64C-NEXT:    lw a0, 0(a0)
5797; CHECK-RV64C-NEXT:    ret
5798;
5799; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_i32:
5800; CHECK-RV32C:       # %bb.0:
5801; CHECK-RV32C-NEXT:    c.ntl.all
5802; CHECK-RV32C-NEXT:    lw a0, 0(a0)
5803; CHECK-RV32C-NEXT:    ret
5804;
5805; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_i32:
5806; CHECK-RV64V:       # %bb.0:
5807; CHECK-RV64V-NEXT:    ntl.all
5808; CHECK-RV64V-NEXT:    lw a0, 0(a0)
5809; CHECK-RV64V-NEXT:    ret
5810;
5811; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_i32:
5812; CHECK-RV32V:       # %bb.0:
5813; CHECK-RV32V-NEXT:    ntl.all
5814; CHECK-RV32V-NEXT:    lw a0, 0(a0)
5815; CHECK-RV32V-NEXT:    ret
5816  %1 = load i32, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
5817  ret i32 %1
5818}
5819
5820define i16 @test_nontemporal_ALL_load_i16(ptr %p) {
5821; CHECK-RV64-LABEL: test_nontemporal_ALL_load_i16:
5822; CHECK-RV64:       # %bb.0:
5823; CHECK-RV64-NEXT:    ntl.all
5824; CHECK-RV64-NEXT:    lh a0, 0(a0)
5825; CHECK-RV64-NEXT:    ret
5826;
5827; CHECK-RV32-LABEL: test_nontemporal_ALL_load_i16:
5828; CHECK-RV32:       # %bb.0:
5829; CHECK-RV32-NEXT:    ntl.all
5830; CHECK-RV32-NEXT:    lh a0, 0(a0)
5831; CHECK-RV32-NEXT:    ret
5832;
5833; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_i16:
5834; CHECK-RV64C:       # %bb.0:
5835; CHECK-RV64C-NEXT:    c.ntl.all
5836; CHECK-RV64C-NEXT:    lh a0, 0(a0)
5837; CHECK-RV64C-NEXT:    ret
5838;
5839; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_i16:
5840; CHECK-RV32C:       # %bb.0:
5841; CHECK-RV32C-NEXT:    c.ntl.all
5842; CHECK-RV32C-NEXT:    lh a0, 0(a0)
5843; CHECK-RV32C-NEXT:    ret
5844;
5845; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_i16:
5846; CHECK-RV64V:       # %bb.0:
5847; CHECK-RV64V-NEXT:    ntl.all
5848; CHECK-RV64V-NEXT:    lh a0, 0(a0)
5849; CHECK-RV64V-NEXT:    ret
5850;
5851; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_i16:
5852; CHECK-RV32V:       # %bb.0:
5853; CHECK-RV32V-NEXT:    ntl.all
5854; CHECK-RV32V-NEXT:    lh a0, 0(a0)
5855; CHECK-RV32V-NEXT:    ret
5856  %1 = load i16, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
5857  ret i16 %1
5858}
5859
5860define i8 @test_nontemporal_ALL_load_i8(ptr %p) {
5861; CHECK-RV64-LABEL: test_nontemporal_ALL_load_i8:
5862; CHECK-RV64:       # %bb.0:
5863; CHECK-RV64-NEXT:    ntl.all
5864; CHECK-RV64-NEXT:    lbu a0, 0(a0)
5865; CHECK-RV64-NEXT:    ret
5866;
5867; CHECK-RV32-LABEL: test_nontemporal_ALL_load_i8:
5868; CHECK-RV32:       # %bb.0:
5869; CHECK-RV32-NEXT:    ntl.all
5870; CHECK-RV32-NEXT:    lbu a0, 0(a0)
5871; CHECK-RV32-NEXT:    ret
5872;
5873; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_i8:
5874; CHECK-RV64C:       # %bb.0:
5875; CHECK-RV64C-NEXT:    c.ntl.all
5876; CHECK-RV64C-NEXT:    lbu a0, 0(a0)
5877; CHECK-RV64C-NEXT:    ret
5878;
5879; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_i8:
5880; CHECK-RV32C:       # %bb.0:
5881; CHECK-RV32C-NEXT:    c.ntl.all
5882; CHECK-RV32C-NEXT:    lbu a0, 0(a0)
5883; CHECK-RV32C-NEXT:    ret
5884;
5885; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_i8:
5886; CHECK-RV64V:       # %bb.0:
5887; CHECK-RV64V-NEXT:    ntl.all
5888; CHECK-RV64V-NEXT:    lbu a0, 0(a0)
5889; CHECK-RV64V-NEXT:    ret
5890;
5891; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_i8:
5892; CHECK-RV32V:       # %bb.0:
5893; CHECK-RV32V-NEXT:    ntl.all
5894; CHECK-RV32V-NEXT:    lbu a0, 0(a0)
5895; CHECK-RV32V-NEXT:    ret
5896  %1 = load i8, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
5897  ret i8 %1
5898}
5899
5900define half @test_nontemporal_ALL_load_half(ptr %p) nounwind {
5901; CHECK-RV64-LABEL: test_nontemporal_ALL_load_half:
5902; CHECK-RV64:       # %bb.0:
5903; CHECK-RV64-NEXT:    ntl.all
5904; CHECK-RV64-NEXT:    flh fa5, 0(a0)
5905; CHECK-RV64-NEXT:    ntl.all
5906; CHECK-RV64-NEXT:    flh fa4, 6(a0)
5907; CHECK-RV64-NEXT:    fadd.h fa0, fa5, fa4
5908; CHECK-RV64-NEXT:    ret
5909;
5910; CHECK-RV32-LABEL: test_nontemporal_ALL_load_half:
5911; CHECK-RV32:       # %bb.0:
5912; CHECK-RV32-NEXT:    ntl.all
5913; CHECK-RV32-NEXT:    flh fa5, 0(a0)
5914; CHECK-RV32-NEXT:    ntl.all
5915; CHECK-RV32-NEXT:    flh fa4, 6(a0)
5916; CHECK-RV32-NEXT:    fadd.h fa0, fa5, fa4
5917; CHECK-RV32-NEXT:    ret
5918;
5919; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_half:
5920; CHECK-RV64C:       # %bb.0:
5921; CHECK-RV64C-NEXT:    c.ntl.all
5922; CHECK-RV64C-NEXT:    flh fa5, 0(a0)
5923; CHECK-RV64C-NEXT:    c.ntl.all
5924; CHECK-RV64C-NEXT:    flh fa4, 6(a0)
5925; CHECK-RV64C-NEXT:    fadd.h fa0, fa5, fa4
5926; CHECK-RV64C-NEXT:    ret
5927;
5928; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_half:
5929; CHECK-RV32C:       # %bb.0:
5930; CHECK-RV32C-NEXT:    c.ntl.all
5931; CHECK-RV32C-NEXT:    flh fa5, 0(a0)
5932; CHECK-RV32C-NEXT:    c.ntl.all
5933; CHECK-RV32C-NEXT:    flh fa4, 6(a0)
5934; CHECK-RV32C-NEXT:    fadd.h fa0, fa5, fa4
5935; CHECK-RV32C-NEXT:    ret
5936;
5937; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_half:
5938; CHECK-RV64V:       # %bb.0:
5939; CHECK-RV64V-NEXT:    ntl.all
5940; CHECK-RV64V-NEXT:    flh fa5, 0(a0)
5941; CHECK-RV64V-NEXT:    ntl.all
5942; CHECK-RV64V-NEXT:    flh fa4, 6(a0)
5943; CHECK-RV64V-NEXT:    fadd.h fa0, fa5, fa4
5944; CHECK-RV64V-NEXT:    ret
5945;
5946; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_half:
5947; CHECK-RV32V:       # %bb.0:
5948; CHECK-RV32V-NEXT:    ntl.all
5949; CHECK-RV32V-NEXT:    flh fa5, 0(a0)
5950; CHECK-RV32V-NEXT:    ntl.all
5951; CHECK-RV32V-NEXT:    flh fa4, 6(a0)
5952; CHECK-RV32V-NEXT:    fadd.h fa0, fa5, fa4
5953; CHECK-RV32V-NEXT:    ret
5954  %1 = load half, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
5955  %2 = getelementptr half, ptr %p, i32 3
5956  %3 = load half, ptr %2, !nontemporal !0, !riscv-nontemporal-domain !4
5957  %4 = fadd half %1, %3
5958  ret half %4
5959}
5960
5961define float @test_nontemporal_ALL_load_float(ptr %p) {
5962; CHECK-RV64-LABEL: test_nontemporal_ALL_load_float:
5963; CHECK-RV64:       # %bb.0:
5964; CHECK-RV64-NEXT:    ntl.all
5965; CHECK-RV64-NEXT:    flw fa0, 0(a0)
5966; CHECK-RV64-NEXT:    ret
5967;
5968; CHECK-RV32-LABEL: test_nontemporal_ALL_load_float:
5969; CHECK-RV32:       # %bb.0:
5970; CHECK-RV32-NEXT:    ntl.all
5971; CHECK-RV32-NEXT:    flw fa0, 0(a0)
5972; CHECK-RV32-NEXT:    ret
5973;
5974; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_float:
5975; CHECK-RV64C:       # %bb.0:
5976; CHECK-RV64C-NEXT:    c.ntl.all
5977; CHECK-RV64C-NEXT:    flw fa0, 0(a0)
5978; CHECK-RV64C-NEXT:    ret
5979;
5980; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_float:
5981; CHECK-RV32C:       # %bb.0:
5982; CHECK-RV32C-NEXT:    c.ntl.all
5983; CHECK-RV32C-NEXT:    flw fa0, 0(a0)
5984; CHECK-RV32C-NEXT:    ret
5985;
5986; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_float:
5987; CHECK-RV64V:       # %bb.0:
5988; CHECK-RV64V-NEXT:    ntl.all
5989; CHECK-RV64V-NEXT:    flw fa0, 0(a0)
5990; CHECK-RV64V-NEXT:    ret
5991;
5992; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_float:
5993; CHECK-RV32V:       # %bb.0:
5994; CHECK-RV32V-NEXT:    ntl.all
5995; CHECK-RV32V-NEXT:    flw fa0, 0(a0)
5996; CHECK-RV32V-NEXT:    ret
5997  %1 = load float, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
5998  ret float %1
5999}
6000
6001define double @test_nontemporal_ALL_load_double(ptr %p) {
6002; CHECK-RV64-LABEL: test_nontemporal_ALL_load_double:
6003; CHECK-RV64:       # %bb.0:
6004; CHECK-RV64-NEXT:    ntl.all
6005; CHECK-RV64-NEXT:    fld fa0, 0(a0)
6006; CHECK-RV64-NEXT:    ret
6007;
6008; CHECK-RV32-LABEL: test_nontemporal_ALL_load_double:
6009; CHECK-RV32:       # %bb.0:
6010; CHECK-RV32-NEXT:    ntl.all
6011; CHECK-RV32-NEXT:    fld fa0, 0(a0)
6012; CHECK-RV32-NEXT:    ret
6013;
6014; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_double:
6015; CHECK-RV64C:       # %bb.0:
6016; CHECK-RV64C-NEXT:    c.ntl.all
6017; CHECK-RV64C-NEXT:    fld fa0, 0(a0)
6018; CHECK-RV64C-NEXT:    ret
6019;
6020; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_double:
6021; CHECK-RV32C:       # %bb.0:
6022; CHECK-RV32C-NEXT:    c.ntl.all
6023; CHECK-RV32C-NEXT:    fld fa0, 0(a0)
6024; CHECK-RV32C-NEXT:    ret
6025;
6026; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_double:
6027; CHECK-RV64V:       # %bb.0:
6028; CHECK-RV64V-NEXT:    ntl.all
6029; CHECK-RV64V-NEXT:    fld fa0, 0(a0)
6030; CHECK-RV64V-NEXT:    ret
6031;
6032; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_double:
6033; CHECK-RV32V:       # %bb.0:
6034; CHECK-RV32V-NEXT:    ntl.all
6035; CHECK-RV32V-NEXT:    fld fa0, 0(a0)
6036; CHECK-RV32V-NEXT:    ret
6037  %1 = load double, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6038  ret double %1
6039}
6040
6041define <16 x i8> @test_nontemporal_ALL_load_v16i8(ptr %p) {
6042; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v16i8:
6043; CHECK-RV64:       # %bb.0:
6044; CHECK-RV64-NEXT:    ntl.all
6045; CHECK-RV64-NEXT:    ld a2, 0(a1)
6046; CHECK-RV64-NEXT:    ntl.all
6047; CHECK-RV64-NEXT:    ld a1, 8(a1)
6048; CHECK-RV64-NEXT:    sd a2, 0(a0)
6049; CHECK-RV64-NEXT:    sd a1, 8(a0)
6050; CHECK-RV64-NEXT:    ret
6051;
6052; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v16i8:
6053; CHECK-RV32:       # %bb.0:
6054; CHECK-RV32-NEXT:    ntl.all
6055; CHECK-RV32-NEXT:    lw a2, 0(a1)
6056; CHECK-RV32-NEXT:    ntl.all
6057; CHECK-RV32-NEXT:    lw a3, 4(a1)
6058; CHECK-RV32-NEXT:    ntl.all
6059; CHECK-RV32-NEXT:    lw a4, 8(a1)
6060; CHECK-RV32-NEXT:    ntl.all
6061; CHECK-RV32-NEXT:    lw a1, 12(a1)
6062; CHECK-RV32-NEXT:    sw a2, 0(a0)
6063; CHECK-RV32-NEXT:    sw a3, 4(a0)
6064; CHECK-RV32-NEXT:    sw a4, 8(a0)
6065; CHECK-RV32-NEXT:    sw a1, 12(a0)
6066; CHECK-RV32-NEXT:    ret
6067;
6068; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v16i8:
6069; CHECK-RV64C:       # %bb.0:
6070; CHECK-RV64C-NEXT:    c.ntl.all
6071; CHECK-RV64C-NEXT:    ld a2, 0(a1)
6072; CHECK-RV64C-NEXT:    c.ntl.all
6073; CHECK-RV64C-NEXT:    ld a1, 8(a1)
6074; CHECK-RV64C-NEXT:    sd a2, 0(a0)
6075; CHECK-RV64C-NEXT:    sd a1, 8(a0)
6076; CHECK-RV64C-NEXT:    ret
6077;
6078; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v16i8:
6079; CHECK-RV32C:       # %bb.0:
6080; CHECK-RV32C-NEXT:    c.ntl.all
6081; CHECK-RV32C-NEXT:    lw a2, 0(a1)
6082; CHECK-RV32C-NEXT:    c.ntl.all
6083; CHECK-RV32C-NEXT:    lw a3, 4(a1)
6084; CHECK-RV32C-NEXT:    c.ntl.all
6085; CHECK-RV32C-NEXT:    lw a4, 8(a1)
6086; CHECK-RV32C-NEXT:    c.ntl.all
6087; CHECK-RV32C-NEXT:    lw a1, 12(a1)
6088; CHECK-RV32C-NEXT:    sw a2, 0(a0)
6089; CHECK-RV32C-NEXT:    sw a3, 4(a0)
6090; CHECK-RV32C-NEXT:    sw a4, 8(a0)
6091; CHECK-RV32C-NEXT:    sw a1, 12(a0)
6092; CHECK-RV32C-NEXT:    ret
6093;
6094; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v16i8:
6095; CHECK-RV64V:       # %bb.0:
6096; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
6097; CHECK-RV64V-NEXT:    ntl.all
6098; CHECK-RV64V-NEXT:    vle8.v v8, (a0)
6099; CHECK-RV64V-NEXT:    ret
6100;
6101; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_v16i8:
6102; CHECK-RV32V:       # %bb.0:
6103; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
6104; CHECK-RV32V-NEXT:    ntl.all
6105; CHECK-RV32V-NEXT:    vle8.v v8, (a0)
6106; CHECK-RV32V-NEXT:    ret
6107  %1 = load <16 x i8>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6108  ret <16 x i8> %1
6109}
6110
6111define <8 x i16> @test_nontemporal_ALL_load_v8i16(ptr %p) {
6112; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v8i16:
6113; CHECK-RV64:       # %bb.0:
6114; CHECK-RV64-NEXT:    ntl.all
6115; CHECK-RV64-NEXT:    ld a2, 0(a1)
6116; CHECK-RV64-NEXT:    ntl.all
6117; CHECK-RV64-NEXT:    ld a1, 8(a1)
6118; CHECK-RV64-NEXT:    sd a2, 0(a0)
6119; CHECK-RV64-NEXT:    sd a1, 8(a0)
6120; CHECK-RV64-NEXT:    ret
6121;
6122; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v8i16:
6123; CHECK-RV32:       # %bb.0:
6124; CHECK-RV32-NEXT:    ntl.all
6125; CHECK-RV32-NEXT:    lw a2, 0(a1)
6126; CHECK-RV32-NEXT:    ntl.all
6127; CHECK-RV32-NEXT:    lw a3, 4(a1)
6128; CHECK-RV32-NEXT:    ntl.all
6129; CHECK-RV32-NEXT:    lw a4, 8(a1)
6130; CHECK-RV32-NEXT:    ntl.all
6131; CHECK-RV32-NEXT:    lw a1, 12(a1)
6132; CHECK-RV32-NEXT:    sw a2, 0(a0)
6133; CHECK-RV32-NEXT:    sw a3, 4(a0)
6134; CHECK-RV32-NEXT:    sw a4, 8(a0)
6135; CHECK-RV32-NEXT:    sw a1, 12(a0)
6136; CHECK-RV32-NEXT:    ret
6137;
6138; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v8i16:
6139; CHECK-RV64C:       # %bb.0:
6140; CHECK-RV64C-NEXT:    c.ntl.all
6141; CHECK-RV64C-NEXT:    ld a2, 0(a1)
6142; CHECK-RV64C-NEXT:    c.ntl.all
6143; CHECK-RV64C-NEXT:    ld a1, 8(a1)
6144; CHECK-RV64C-NEXT:    sd a2, 0(a0)
6145; CHECK-RV64C-NEXT:    sd a1, 8(a0)
6146; CHECK-RV64C-NEXT:    ret
6147;
6148; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v8i16:
6149; CHECK-RV32C:       # %bb.0:
6150; CHECK-RV32C-NEXT:    c.ntl.all
6151; CHECK-RV32C-NEXT:    lw a2, 0(a1)
6152; CHECK-RV32C-NEXT:    c.ntl.all
6153; CHECK-RV32C-NEXT:    lw a3, 4(a1)
6154; CHECK-RV32C-NEXT:    c.ntl.all
6155; CHECK-RV32C-NEXT:    lw a4, 8(a1)
6156; CHECK-RV32C-NEXT:    c.ntl.all
6157; CHECK-RV32C-NEXT:    lw a1, 12(a1)
6158; CHECK-RV32C-NEXT:    sw a2, 0(a0)
6159; CHECK-RV32C-NEXT:    sw a3, 4(a0)
6160; CHECK-RV32C-NEXT:    sw a4, 8(a0)
6161; CHECK-RV32C-NEXT:    sw a1, 12(a0)
6162; CHECK-RV32C-NEXT:    ret
6163;
6164; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v8i16:
6165; CHECK-RV64V:       # %bb.0:
6166; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
6167; CHECK-RV64V-NEXT:    ntl.all
6168; CHECK-RV64V-NEXT:    vle16.v v8, (a0)
6169; CHECK-RV64V-NEXT:    ret
6170;
6171; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_v8i16:
6172; CHECK-RV32V:       # %bb.0:
6173; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
6174; CHECK-RV32V-NEXT:    ntl.all
6175; CHECK-RV32V-NEXT:    vle16.v v8, (a0)
6176; CHECK-RV32V-NEXT:    ret
6177  %1 = load <8 x i16>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6178  ret <8 x i16> %1
6179}
6180
6181define <4 x i32> @test_nontemporal_ALL_load_v4i32(ptr %p) {
6182; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v4i32:
6183; CHECK-RV64:       # %bb.0:
6184; CHECK-RV64-NEXT:    ntl.all
6185; CHECK-RV64-NEXT:    ld a2, 0(a1)
6186; CHECK-RV64-NEXT:    ntl.all
6187; CHECK-RV64-NEXT:    ld a1, 8(a1)
6188; CHECK-RV64-NEXT:    sd a2, 0(a0)
6189; CHECK-RV64-NEXT:    sd a1, 8(a0)
6190; CHECK-RV64-NEXT:    ret
6191;
6192; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v4i32:
6193; CHECK-RV32:       # %bb.0:
6194; CHECK-RV32-NEXT:    ntl.all
6195; CHECK-RV32-NEXT:    lw a2, 0(a1)
6196; CHECK-RV32-NEXT:    ntl.all
6197; CHECK-RV32-NEXT:    lw a3, 4(a1)
6198; CHECK-RV32-NEXT:    ntl.all
6199; CHECK-RV32-NEXT:    lw a4, 8(a1)
6200; CHECK-RV32-NEXT:    ntl.all
6201; CHECK-RV32-NEXT:    lw a1, 12(a1)
6202; CHECK-RV32-NEXT:    sw a2, 0(a0)
6203; CHECK-RV32-NEXT:    sw a3, 4(a0)
6204; CHECK-RV32-NEXT:    sw a4, 8(a0)
6205; CHECK-RV32-NEXT:    sw a1, 12(a0)
6206; CHECK-RV32-NEXT:    ret
6207;
6208; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v4i32:
6209; CHECK-RV64C:       # %bb.0:
6210; CHECK-RV64C-NEXT:    c.ntl.all
6211; CHECK-RV64C-NEXT:    ld a2, 0(a1)
6212; CHECK-RV64C-NEXT:    c.ntl.all
6213; CHECK-RV64C-NEXT:    ld a1, 8(a1)
6214; CHECK-RV64C-NEXT:    sd a2, 0(a0)
6215; CHECK-RV64C-NEXT:    sd a1, 8(a0)
6216; CHECK-RV64C-NEXT:    ret
6217;
6218; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v4i32:
6219; CHECK-RV32C:       # %bb.0:
6220; CHECK-RV32C-NEXT:    c.ntl.all
6221; CHECK-RV32C-NEXT:    lw a2, 0(a1)
6222; CHECK-RV32C-NEXT:    c.ntl.all
6223; CHECK-RV32C-NEXT:    lw a3, 4(a1)
6224; CHECK-RV32C-NEXT:    c.ntl.all
6225; CHECK-RV32C-NEXT:    lw a4, 8(a1)
6226; CHECK-RV32C-NEXT:    c.ntl.all
6227; CHECK-RV32C-NEXT:    lw a1, 12(a1)
6228; CHECK-RV32C-NEXT:    sw a2, 0(a0)
6229; CHECK-RV32C-NEXT:    sw a3, 4(a0)
6230; CHECK-RV32C-NEXT:    sw a4, 8(a0)
6231; CHECK-RV32C-NEXT:    sw a1, 12(a0)
6232; CHECK-RV32C-NEXT:    ret
6233;
6234; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v4i32:
6235; CHECK-RV64V:       # %bb.0:
6236; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
6237; CHECK-RV64V-NEXT:    ntl.all
6238; CHECK-RV64V-NEXT:    vle32.v v8, (a0)
6239; CHECK-RV64V-NEXT:    ret
6240;
6241; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_v4i32:
6242; CHECK-RV32V:       # %bb.0:
6243; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
6244; CHECK-RV32V-NEXT:    ntl.all
6245; CHECK-RV32V-NEXT:    vle32.v v8, (a0)
6246; CHECK-RV32V-NEXT:    ret
6247  %1 = load <4 x i32>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6248  ret <4 x i32> %1
6249}
6250
6251define <2 x i64> @test_nontemporal_ALL_load_v2i64(ptr %p) {
6252; CHECK-RV64-LABEL: test_nontemporal_ALL_load_v2i64:
6253; CHECK-RV64:       # %bb.0:
6254; CHECK-RV64-NEXT:    ntl.all
6255; CHECK-RV64-NEXT:    ld a2, 0(a0)
6256; CHECK-RV64-NEXT:    ntl.all
6257; CHECK-RV64-NEXT:    ld a1, 8(a0)
6258; CHECK-RV64-NEXT:    mv a0, a2
6259; CHECK-RV64-NEXT:    ret
6260;
6261; CHECK-RV32-LABEL: test_nontemporal_ALL_load_v2i64:
6262; CHECK-RV32:       # %bb.0:
6263; CHECK-RV32-NEXT:    ntl.all
6264; CHECK-RV32-NEXT:    lw a2, 0(a1)
6265; CHECK-RV32-NEXT:    ntl.all
6266; CHECK-RV32-NEXT:    lw a3, 4(a1)
6267; CHECK-RV32-NEXT:    ntl.all
6268; CHECK-RV32-NEXT:    lw a4, 8(a1)
6269; CHECK-RV32-NEXT:    ntl.all
6270; CHECK-RV32-NEXT:    lw a1, 12(a1)
6271; CHECK-RV32-NEXT:    sw a2, 0(a0)
6272; CHECK-RV32-NEXT:    sw a3, 4(a0)
6273; CHECK-RV32-NEXT:    sw a4, 8(a0)
6274; CHECK-RV32-NEXT:    sw a1, 12(a0)
6275; CHECK-RV32-NEXT:    ret
6276;
6277; CHECK-RV64C-LABEL: test_nontemporal_ALL_load_v2i64:
6278; CHECK-RV64C:       # %bb.0:
6279; CHECK-RV64C-NEXT:    c.ntl.all
6280; CHECK-RV64C-NEXT:    ld a2, 0(a0)
6281; CHECK-RV64C-NEXT:    c.ntl.all
6282; CHECK-RV64C-NEXT:    ld a1, 8(a0)
6283; CHECK-RV64C-NEXT:    mv a0, a2
6284; CHECK-RV64C-NEXT:    ret
6285;
6286; CHECK-RV32C-LABEL: test_nontemporal_ALL_load_v2i64:
6287; CHECK-RV32C:       # %bb.0:
6288; CHECK-RV32C-NEXT:    c.ntl.all
6289; CHECK-RV32C-NEXT:    lw a2, 0(a1)
6290; CHECK-RV32C-NEXT:    c.ntl.all
6291; CHECK-RV32C-NEXT:    lw a3, 4(a1)
6292; CHECK-RV32C-NEXT:    c.ntl.all
6293; CHECK-RV32C-NEXT:    lw a4, 8(a1)
6294; CHECK-RV32C-NEXT:    c.ntl.all
6295; CHECK-RV32C-NEXT:    lw a1, 12(a1)
6296; CHECK-RV32C-NEXT:    sw a2, 0(a0)
6297; CHECK-RV32C-NEXT:    sw a3, 4(a0)
6298; CHECK-RV32C-NEXT:    sw a4, 8(a0)
6299; CHECK-RV32C-NEXT:    sw a1, 12(a0)
6300; CHECK-RV32C-NEXT:    ret
6301;
6302; CHECK-RV64V-LABEL: test_nontemporal_ALL_load_v2i64:
6303; CHECK-RV64V:       # %bb.0:
6304; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
6305; CHECK-RV64V-NEXT:    ntl.all
6306; CHECK-RV64V-NEXT:    vle64.v v8, (a0)
6307; CHECK-RV64V-NEXT:    ret
6308;
6309; CHECK-RV32V-LABEL: test_nontemporal_ALL_load_v2i64:
6310; CHECK-RV32V:       # %bb.0:
6311; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
6312; CHECK-RV32V-NEXT:    ntl.all
6313; CHECK-RV32V-NEXT:    vle64.v v8, (a0)
6314; CHECK-RV32V-NEXT:    ret
6315  %1 = load <2 x i64>, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6316  ret <2 x i64> %1
6317}
6318
6319define void @test_nontemporal_ALL_store_i64(ptr %p, i64 %v) {
6320; CHECK-RV64-LABEL: test_nontemporal_ALL_store_i64:
6321; CHECK-RV64:       # %bb.0:
6322; CHECK-RV64-NEXT:    ntl.all
6323; CHECK-RV64-NEXT:    sd a1, 0(a0)
6324; CHECK-RV64-NEXT:    ret
6325;
6326; CHECK-RV32-LABEL: test_nontemporal_ALL_store_i64:
6327; CHECK-RV32:       # %bb.0:
6328; CHECK-RV32-NEXT:    ntl.all
6329; CHECK-RV32-NEXT:    sw a1, 0(a0)
6330; CHECK-RV32-NEXT:    ntl.all
6331; CHECK-RV32-NEXT:    sw a2, 4(a0)
6332; CHECK-RV32-NEXT:    ret
6333;
6334; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_i64:
6335; CHECK-RV64C:       # %bb.0:
6336; CHECK-RV64C-NEXT:    c.ntl.all
6337; CHECK-RV64C-NEXT:    sd a1, 0(a0)
6338; CHECK-RV64C-NEXT:    ret
6339;
6340; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_i64:
6341; CHECK-RV32C:       # %bb.0:
6342; CHECK-RV32C-NEXT:    c.ntl.all
6343; CHECK-RV32C-NEXT:    sw a1, 0(a0)
6344; CHECK-RV32C-NEXT:    c.ntl.all
6345; CHECK-RV32C-NEXT:    sw a2, 4(a0)
6346; CHECK-RV32C-NEXT:    ret
6347;
6348; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_i64:
6349; CHECK-RV64V:       # %bb.0:
6350; CHECK-RV64V-NEXT:    ntl.all
6351; CHECK-RV64V-NEXT:    sd a1, 0(a0)
6352; CHECK-RV64V-NEXT:    ret
6353;
6354; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_i64:
6355; CHECK-RV32V:       # %bb.0:
6356; CHECK-RV32V-NEXT:    ntl.all
6357; CHECK-RV32V-NEXT:    sw a1, 0(a0)
6358; CHECK-RV32V-NEXT:    ntl.all
6359; CHECK-RV32V-NEXT:    sw a2, 4(a0)
6360; CHECK-RV32V-NEXT:    ret
6361  store i64 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6362  ret void
6363}
6364
6365define void @test_nontemporal_ALL_store_i32(ptr %p, i32 %v) {
6366; CHECK-RV64-LABEL: test_nontemporal_ALL_store_i32:
6367; CHECK-RV64:       # %bb.0:
6368; CHECK-RV64-NEXT:    ntl.all
6369; CHECK-RV64-NEXT:    sw a1, 0(a0)
6370; CHECK-RV64-NEXT:    ret
6371;
6372; CHECK-RV32-LABEL: test_nontemporal_ALL_store_i32:
6373; CHECK-RV32:       # %bb.0:
6374; CHECK-RV32-NEXT:    ntl.all
6375; CHECK-RV32-NEXT:    sw a1, 0(a0)
6376; CHECK-RV32-NEXT:    ret
6377;
6378; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_i32:
6379; CHECK-RV64C:       # %bb.0:
6380; CHECK-RV64C-NEXT:    c.ntl.all
6381; CHECK-RV64C-NEXT:    sw a1, 0(a0)
6382; CHECK-RV64C-NEXT:    ret
6383;
6384; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_i32:
6385; CHECK-RV32C:       # %bb.0:
6386; CHECK-RV32C-NEXT:    c.ntl.all
6387; CHECK-RV32C-NEXT:    sw a1, 0(a0)
6388; CHECK-RV32C-NEXT:    ret
6389;
6390; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_i32:
6391; CHECK-RV64V:       # %bb.0:
6392; CHECK-RV64V-NEXT:    ntl.all
6393; CHECK-RV64V-NEXT:    sw a1, 0(a0)
6394; CHECK-RV64V-NEXT:    ret
6395;
6396; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_i32:
6397; CHECK-RV32V:       # %bb.0:
6398; CHECK-RV32V-NEXT:    ntl.all
6399; CHECK-RV32V-NEXT:    sw a1, 0(a0)
6400; CHECK-RV32V-NEXT:    ret
6401  store i32 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6402  ret void
6403}
6404
6405define void @test_nontemporal_ALL_store_i16(ptr %p, i16 %v) {
6406; CHECK-RV64-LABEL: test_nontemporal_ALL_store_i16:
6407; CHECK-RV64:       # %bb.0:
6408; CHECK-RV64-NEXT:    ntl.all
6409; CHECK-RV64-NEXT:    sh a1, 0(a0)
6410; CHECK-RV64-NEXT:    ret
6411;
6412; CHECK-RV32-LABEL: test_nontemporal_ALL_store_i16:
6413; CHECK-RV32:       # %bb.0:
6414; CHECK-RV32-NEXT:    ntl.all
6415; CHECK-RV32-NEXT:    sh a1, 0(a0)
6416; CHECK-RV32-NEXT:    ret
6417;
6418; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_i16:
6419; CHECK-RV64C:       # %bb.0:
6420; CHECK-RV64C-NEXT:    c.ntl.all
6421; CHECK-RV64C-NEXT:    sh a1, 0(a0)
6422; CHECK-RV64C-NEXT:    ret
6423;
6424; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_i16:
6425; CHECK-RV32C:       # %bb.0:
6426; CHECK-RV32C-NEXT:    c.ntl.all
6427; CHECK-RV32C-NEXT:    sh a1, 0(a0)
6428; CHECK-RV32C-NEXT:    ret
6429;
6430; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_i16:
6431; CHECK-RV64V:       # %bb.0:
6432; CHECK-RV64V-NEXT:    ntl.all
6433; CHECK-RV64V-NEXT:    sh a1, 0(a0)
6434; CHECK-RV64V-NEXT:    ret
6435;
6436; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_i16:
6437; CHECK-RV32V:       # %bb.0:
6438; CHECK-RV32V-NEXT:    ntl.all
6439; CHECK-RV32V-NEXT:    sh a1, 0(a0)
6440; CHECK-RV32V-NEXT:    ret
6441  store i16 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6442  ret void
6443}
6444
6445define void @test_nontemporal_ALL_store_i8(ptr %p, i8 %v) {
6446; CHECK-RV64-LABEL: test_nontemporal_ALL_store_i8:
6447; CHECK-RV64:       # %bb.0:
6448; CHECK-RV64-NEXT:    ntl.all
6449; CHECK-RV64-NEXT:    sb a1, 0(a0)
6450; CHECK-RV64-NEXT:    ret
6451;
6452; CHECK-RV32-LABEL: test_nontemporal_ALL_store_i8:
6453; CHECK-RV32:       # %bb.0:
6454; CHECK-RV32-NEXT:    ntl.all
6455; CHECK-RV32-NEXT:    sb a1, 0(a0)
6456; CHECK-RV32-NEXT:    ret
6457;
6458; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_i8:
6459; CHECK-RV64C:       # %bb.0:
6460; CHECK-RV64C-NEXT:    c.ntl.all
6461; CHECK-RV64C-NEXT:    sb a1, 0(a0)
6462; CHECK-RV64C-NEXT:    ret
6463;
6464; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_i8:
6465; CHECK-RV32C:       # %bb.0:
6466; CHECK-RV32C-NEXT:    c.ntl.all
6467; CHECK-RV32C-NEXT:    sb a1, 0(a0)
6468; CHECK-RV32C-NEXT:    ret
6469;
6470; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_i8:
6471; CHECK-RV64V:       # %bb.0:
6472; CHECK-RV64V-NEXT:    ntl.all
6473; CHECK-RV64V-NEXT:    sb a1, 0(a0)
6474; CHECK-RV64V-NEXT:    ret
6475;
6476; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_i8:
6477; CHECK-RV32V:       # %bb.0:
6478; CHECK-RV32V-NEXT:    ntl.all
6479; CHECK-RV32V-NEXT:    sb a1, 0(a0)
6480; CHECK-RV32V-NEXT:    ret
6481  store i8 %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6482  ret void
6483}
6484
6485define void @test_nontemporal_ALL_store_half(ptr %p, half %v) {
6486; CHECK-RV64-LABEL: test_nontemporal_ALL_store_half:
6487; CHECK-RV64:       # %bb.0:
6488; CHECK-RV64-NEXT:    ntl.all
6489; CHECK-RV64-NEXT:    fsh fa0, 0(a0)
6490; CHECK-RV64-NEXT:    ret
6491;
6492; CHECK-RV32-LABEL: test_nontemporal_ALL_store_half:
6493; CHECK-RV32:       # %bb.0:
6494; CHECK-RV32-NEXT:    ntl.all
6495; CHECK-RV32-NEXT:    fsh fa0, 0(a0)
6496; CHECK-RV32-NEXT:    ret
6497;
6498; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_half:
6499; CHECK-RV64C:       # %bb.0:
6500; CHECK-RV64C-NEXT:    c.ntl.all
6501; CHECK-RV64C-NEXT:    fsh fa0, 0(a0)
6502; CHECK-RV64C-NEXT:    ret
6503;
6504; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_half:
6505; CHECK-RV32C:       # %bb.0:
6506; CHECK-RV32C-NEXT:    c.ntl.all
6507; CHECK-RV32C-NEXT:    fsh fa0, 0(a0)
6508; CHECK-RV32C-NEXT:    ret
6509;
6510; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_half:
6511; CHECK-RV64V:       # %bb.0:
6512; CHECK-RV64V-NEXT:    ntl.all
6513; CHECK-RV64V-NEXT:    fsh fa0, 0(a0)
6514; CHECK-RV64V-NEXT:    ret
6515;
6516; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_half:
6517; CHECK-RV32V:       # %bb.0:
6518; CHECK-RV32V-NEXT:    ntl.all
6519; CHECK-RV32V-NEXT:    fsh fa0, 0(a0)
6520; CHECK-RV32V-NEXT:    ret
6521  store half %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6522  ret void
6523}
6524
6525define void @test_nontemporal_ALL_store_float(ptr %p, float %v) {
6526; CHECK-RV64-LABEL: test_nontemporal_ALL_store_float:
6527; CHECK-RV64:       # %bb.0:
6528; CHECK-RV64-NEXT:    ntl.all
6529; CHECK-RV64-NEXT:    fsw fa0, 0(a0)
6530; CHECK-RV64-NEXT:    ret
6531;
6532; CHECK-RV32-LABEL: test_nontemporal_ALL_store_float:
6533; CHECK-RV32:       # %bb.0:
6534; CHECK-RV32-NEXT:    ntl.all
6535; CHECK-RV32-NEXT:    fsw fa0, 0(a0)
6536; CHECK-RV32-NEXT:    ret
6537;
6538; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_float:
6539; CHECK-RV64C:       # %bb.0:
6540; CHECK-RV64C-NEXT:    c.ntl.all
6541; CHECK-RV64C-NEXT:    fsw fa0, 0(a0)
6542; CHECK-RV64C-NEXT:    ret
6543;
6544; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_float:
6545; CHECK-RV32C:       # %bb.0:
6546; CHECK-RV32C-NEXT:    c.ntl.all
6547; CHECK-RV32C-NEXT:    fsw fa0, 0(a0)
6548; CHECK-RV32C-NEXT:    ret
6549;
6550; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_float:
6551; CHECK-RV64V:       # %bb.0:
6552; CHECK-RV64V-NEXT:    ntl.all
6553; CHECK-RV64V-NEXT:    fsw fa0, 0(a0)
6554; CHECK-RV64V-NEXT:    ret
6555;
6556; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_float:
6557; CHECK-RV32V:       # %bb.0:
6558; CHECK-RV32V-NEXT:    ntl.all
6559; CHECK-RV32V-NEXT:    fsw fa0, 0(a0)
6560; CHECK-RV32V-NEXT:    ret
6561  store float %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6562  ret void
6563}
6564
6565define void @test_nontemporal_ALL_store_double(ptr %p, double %v) {
6566; CHECK-RV64-LABEL: test_nontemporal_ALL_store_double:
6567; CHECK-RV64:       # %bb.0:
6568; CHECK-RV64-NEXT:    ntl.all
6569; CHECK-RV64-NEXT:    fsd fa0, 0(a0)
6570; CHECK-RV64-NEXT:    ret
6571;
6572; CHECK-RV32-LABEL: test_nontemporal_ALL_store_double:
6573; CHECK-RV32:       # %bb.0:
6574; CHECK-RV32-NEXT:    ntl.all
6575; CHECK-RV32-NEXT:    fsd fa0, 0(a0)
6576; CHECK-RV32-NEXT:    ret
6577;
6578; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_double:
6579; CHECK-RV64C:       # %bb.0:
6580; CHECK-RV64C-NEXT:    c.ntl.all
6581; CHECK-RV64C-NEXT:    fsd fa0, 0(a0)
6582; CHECK-RV64C-NEXT:    ret
6583;
6584; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_double:
6585; CHECK-RV32C:       # %bb.0:
6586; CHECK-RV32C-NEXT:    c.ntl.all
6587; CHECK-RV32C-NEXT:    fsd fa0, 0(a0)
6588; CHECK-RV32C-NEXT:    ret
6589;
6590; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_double:
6591; CHECK-RV64V:       # %bb.0:
6592; CHECK-RV64V-NEXT:    ntl.all
6593; CHECK-RV64V-NEXT:    fsd fa0, 0(a0)
6594; CHECK-RV64V-NEXT:    ret
6595;
6596; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_double:
6597; CHECK-RV32V:       # %bb.0:
6598; CHECK-RV32V-NEXT:    ntl.all
6599; CHECK-RV32V-NEXT:    fsd fa0, 0(a0)
6600; CHECK-RV32V-NEXT:    ret
6601  store double %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6602  ret void
6603}
6604
6605define void @test_nontemporal_ALL_store_v16i8(ptr %p, <16 x i8> %v) {
6606; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v16i8:
6607; CHECK-RV64:       # %bb.0:
6608; CHECK-RV64-NEXT:    addi sp, sp, -16
6609; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 16
6610; CHECK-RV64-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
6611; CHECK-RV64-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
6612; CHECK-RV64-NEXT:    .cfi_offset s0, -8
6613; CHECK-RV64-NEXT:    .cfi_offset s1, -16
6614; CHECK-RV64-NEXT:    lbu a2, 0(a1)
6615; CHECK-RV64-NEXT:    lbu a3, 8(a1)
6616; CHECK-RV64-NEXT:    lbu a4, 16(a1)
6617; CHECK-RV64-NEXT:    lbu a5, 24(a1)
6618; CHECK-RV64-NEXT:    lbu a6, 32(a1)
6619; CHECK-RV64-NEXT:    lbu a7, 40(a1)
6620; CHECK-RV64-NEXT:    lbu t0, 48(a1)
6621; CHECK-RV64-NEXT:    lbu t1, 56(a1)
6622; CHECK-RV64-NEXT:    lbu t2, 64(a1)
6623; CHECK-RV64-NEXT:    lbu t3, 72(a1)
6624; CHECK-RV64-NEXT:    lbu t4, 80(a1)
6625; CHECK-RV64-NEXT:    lbu t5, 88(a1)
6626; CHECK-RV64-NEXT:    lbu t6, 96(a1)
6627; CHECK-RV64-NEXT:    lbu s0, 104(a1)
6628; CHECK-RV64-NEXT:    lbu s1, 112(a1)
6629; CHECK-RV64-NEXT:    lbu a1, 120(a1)
6630; CHECK-RV64-NEXT:    ntl.all
6631; CHECK-RV64-NEXT:    sb t6, 12(a0)
6632; CHECK-RV64-NEXT:    ntl.all
6633; CHECK-RV64-NEXT:    sb s0, 13(a0)
6634; CHECK-RV64-NEXT:    ntl.all
6635; CHECK-RV64-NEXT:    sb s1, 14(a0)
6636; CHECK-RV64-NEXT:    ntl.all
6637; CHECK-RV64-NEXT:    sb a1, 15(a0)
6638; CHECK-RV64-NEXT:    ntl.all
6639; CHECK-RV64-NEXT:    sb t2, 8(a0)
6640; CHECK-RV64-NEXT:    ntl.all
6641; CHECK-RV64-NEXT:    sb t3, 9(a0)
6642; CHECK-RV64-NEXT:    ntl.all
6643; CHECK-RV64-NEXT:    sb t4, 10(a0)
6644; CHECK-RV64-NEXT:    ntl.all
6645; CHECK-RV64-NEXT:    sb t5, 11(a0)
6646; CHECK-RV64-NEXT:    ntl.all
6647; CHECK-RV64-NEXT:    sb a6, 4(a0)
6648; CHECK-RV64-NEXT:    ntl.all
6649; CHECK-RV64-NEXT:    sb a7, 5(a0)
6650; CHECK-RV64-NEXT:    ntl.all
6651; CHECK-RV64-NEXT:    sb t0, 6(a0)
6652; CHECK-RV64-NEXT:    ntl.all
6653; CHECK-RV64-NEXT:    sb t1, 7(a0)
6654; CHECK-RV64-NEXT:    ntl.all
6655; CHECK-RV64-NEXT:    sb a2, 0(a0)
6656; CHECK-RV64-NEXT:    ntl.all
6657; CHECK-RV64-NEXT:    sb a3, 1(a0)
6658; CHECK-RV64-NEXT:    ntl.all
6659; CHECK-RV64-NEXT:    sb a4, 2(a0)
6660; CHECK-RV64-NEXT:    ntl.all
6661; CHECK-RV64-NEXT:    sb a5, 3(a0)
6662; CHECK-RV64-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
6663; CHECK-RV64-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
6664; CHECK-RV64-NEXT:    .cfi_restore s0
6665; CHECK-RV64-NEXT:    .cfi_restore s1
6666; CHECK-RV64-NEXT:    addi sp, sp, 16
6667; CHECK-RV64-NEXT:    .cfi_def_cfa_offset 0
6668; CHECK-RV64-NEXT:    ret
6669;
6670; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v16i8:
6671; CHECK-RV32:       # %bb.0:
6672; CHECK-RV32-NEXT:    addi sp, sp, -16
6673; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 16
6674; CHECK-RV32-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
6675; CHECK-RV32-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
6676; CHECK-RV32-NEXT:    .cfi_offset s0, -4
6677; CHECK-RV32-NEXT:    .cfi_offset s1, -8
6678; CHECK-RV32-NEXT:    lbu a2, 0(a1)
6679; CHECK-RV32-NEXT:    lbu a3, 4(a1)
6680; CHECK-RV32-NEXT:    lbu a4, 8(a1)
6681; CHECK-RV32-NEXT:    lbu a5, 12(a1)
6682; CHECK-RV32-NEXT:    lbu a6, 16(a1)
6683; CHECK-RV32-NEXT:    lbu a7, 20(a1)
6684; CHECK-RV32-NEXT:    lbu t0, 24(a1)
6685; CHECK-RV32-NEXT:    lbu t1, 28(a1)
6686; CHECK-RV32-NEXT:    lbu t2, 32(a1)
6687; CHECK-RV32-NEXT:    lbu t3, 36(a1)
6688; CHECK-RV32-NEXT:    lbu t4, 40(a1)
6689; CHECK-RV32-NEXT:    lbu t5, 44(a1)
6690; CHECK-RV32-NEXT:    lbu t6, 48(a1)
6691; CHECK-RV32-NEXT:    lbu s0, 52(a1)
6692; CHECK-RV32-NEXT:    lbu s1, 56(a1)
6693; CHECK-RV32-NEXT:    lbu a1, 60(a1)
6694; CHECK-RV32-NEXT:    ntl.all
6695; CHECK-RV32-NEXT:    sb t6, 12(a0)
6696; CHECK-RV32-NEXT:    ntl.all
6697; CHECK-RV32-NEXT:    sb s0, 13(a0)
6698; CHECK-RV32-NEXT:    ntl.all
6699; CHECK-RV32-NEXT:    sb s1, 14(a0)
6700; CHECK-RV32-NEXT:    ntl.all
6701; CHECK-RV32-NEXT:    sb a1, 15(a0)
6702; CHECK-RV32-NEXT:    ntl.all
6703; CHECK-RV32-NEXT:    sb t2, 8(a0)
6704; CHECK-RV32-NEXT:    ntl.all
6705; CHECK-RV32-NEXT:    sb t3, 9(a0)
6706; CHECK-RV32-NEXT:    ntl.all
6707; CHECK-RV32-NEXT:    sb t4, 10(a0)
6708; CHECK-RV32-NEXT:    ntl.all
6709; CHECK-RV32-NEXT:    sb t5, 11(a0)
6710; CHECK-RV32-NEXT:    ntl.all
6711; CHECK-RV32-NEXT:    sb a6, 4(a0)
6712; CHECK-RV32-NEXT:    ntl.all
6713; CHECK-RV32-NEXT:    sb a7, 5(a0)
6714; CHECK-RV32-NEXT:    ntl.all
6715; CHECK-RV32-NEXT:    sb t0, 6(a0)
6716; CHECK-RV32-NEXT:    ntl.all
6717; CHECK-RV32-NEXT:    sb t1, 7(a0)
6718; CHECK-RV32-NEXT:    ntl.all
6719; CHECK-RV32-NEXT:    sb a2, 0(a0)
6720; CHECK-RV32-NEXT:    ntl.all
6721; CHECK-RV32-NEXT:    sb a3, 1(a0)
6722; CHECK-RV32-NEXT:    ntl.all
6723; CHECK-RV32-NEXT:    sb a4, 2(a0)
6724; CHECK-RV32-NEXT:    ntl.all
6725; CHECK-RV32-NEXT:    sb a5, 3(a0)
6726; CHECK-RV32-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
6727; CHECK-RV32-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
6728; CHECK-RV32-NEXT:    .cfi_restore s0
6729; CHECK-RV32-NEXT:    .cfi_restore s1
6730; CHECK-RV32-NEXT:    addi sp, sp, 16
6731; CHECK-RV32-NEXT:    .cfi_def_cfa_offset 0
6732; CHECK-RV32-NEXT:    ret
6733;
6734; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v16i8:
6735; CHECK-RV64C:       # %bb.0:
6736; CHECK-RV64C-NEXT:    addi sp, sp, -16
6737; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 16
6738; CHECK-RV64C-NEXT:    sd s0, 8(sp) # 8-byte Folded Spill
6739; CHECK-RV64C-NEXT:    sd s1, 0(sp) # 8-byte Folded Spill
6740; CHECK-RV64C-NEXT:    .cfi_offset s0, -8
6741; CHECK-RV64C-NEXT:    .cfi_offset s1, -16
6742; CHECK-RV64C-NEXT:    lbu a6, 0(a1)
6743; CHECK-RV64C-NEXT:    lbu a7, 8(a1)
6744; CHECK-RV64C-NEXT:    lbu t0, 16(a1)
6745; CHECK-RV64C-NEXT:    lbu t1, 24(a1)
6746; CHECK-RV64C-NEXT:    lbu t2, 32(a1)
6747; CHECK-RV64C-NEXT:    lbu t3, 40(a1)
6748; CHECK-RV64C-NEXT:    lbu t4, 48(a1)
6749; CHECK-RV64C-NEXT:    lbu t5, 56(a1)
6750; CHECK-RV64C-NEXT:    lbu t6, 64(a1)
6751; CHECK-RV64C-NEXT:    lbu a3, 72(a1)
6752; CHECK-RV64C-NEXT:    lbu a4, 80(a1)
6753; CHECK-RV64C-NEXT:    lbu a5, 88(a1)
6754; CHECK-RV64C-NEXT:    lbu a2, 96(a1)
6755; CHECK-RV64C-NEXT:    lbu s0, 104(a1)
6756; CHECK-RV64C-NEXT:    lbu s1, 112(a1)
6757; CHECK-RV64C-NEXT:    lbu a1, 120(a1)
6758; CHECK-RV64C-NEXT:    c.ntl.all
6759; CHECK-RV64C-NEXT:    sb a2, 12(a0)
6760; CHECK-RV64C-NEXT:    c.ntl.all
6761; CHECK-RV64C-NEXT:    sb s0, 13(a0)
6762; CHECK-RV64C-NEXT:    c.ntl.all
6763; CHECK-RV64C-NEXT:    sb s1, 14(a0)
6764; CHECK-RV64C-NEXT:    c.ntl.all
6765; CHECK-RV64C-NEXT:    sb a1, 15(a0)
6766; CHECK-RV64C-NEXT:    c.ntl.all
6767; CHECK-RV64C-NEXT:    sb t6, 8(a0)
6768; CHECK-RV64C-NEXT:    c.ntl.all
6769; CHECK-RV64C-NEXT:    sb a3, 9(a0)
6770; CHECK-RV64C-NEXT:    c.ntl.all
6771; CHECK-RV64C-NEXT:    sb a4, 10(a0)
6772; CHECK-RV64C-NEXT:    c.ntl.all
6773; CHECK-RV64C-NEXT:    sb a5, 11(a0)
6774; CHECK-RV64C-NEXT:    c.ntl.all
6775; CHECK-RV64C-NEXT:    sb t2, 4(a0)
6776; CHECK-RV64C-NEXT:    c.ntl.all
6777; CHECK-RV64C-NEXT:    sb t3, 5(a0)
6778; CHECK-RV64C-NEXT:    c.ntl.all
6779; CHECK-RV64C-NEXT:    sb t4, 6(a0)
6780; CHECK-RV64C-NEXT:    c.ntl.all
6781; CHECK-RV64C-NEXT:    sb t5, 7(a0)
6782; CHECK-RV64C-NEXT:    c.ntl.all
6783; CHECK-RV64C-NEXT:    sb a6, 0(a0)
6784; CHECK-RV64C-NEXT:    c.ntl.all
6785; CHECK-RV64C-NEXT:    sb a7, 1(a0)
6786; CHECK-RV64C-NEXT:    c.ntl.all
6787; CHECK-RV64C-NEXT:    sb t0, 2(a0)
6788; CHECK-RV64C-NEXT:    c.ntl.all
6789; CHECK-RV64C-NEXT:    sb t1, 3(a0)
6790; CHECK-RV64C-NEXT:    ld s0, 8(sp) # 8-byte Folded Reload
6791; CHECK-RV64C-NEXT:    ld s1, 0(sp) # 8-byte Folded Reload
6792; CHECK-RV64C-NEXT:    .cfi_restore s0
6793; CHECK-RV64C-NEXT:    .cfi_restore s1
6794; CHECK-RV64C-NEXT:    addi sp, sp, 16
6795; CHECK-RV64C-NEXT:    .cfi_def_cfa_offset 0
6796; CHECK-RV64C-NEXT:    ret
6797;
6798; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v16i8:
6799; CHECK-RV32C:       # %bb.0:
6800; CHECK-RV32C-NEXT:    addi sp, sp, -16
6801; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 16
6802; CHECK-RV32C-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
6803; CHECK-RV32C-NEXT:    sw s1, 8(sp) # 4-byte Folded Spill
6804; CHECK-RV32C-NEXT:    .cfi_offset s0, -4
6805; CHECK-RV32C-NEXT:    .cfi_offset s1, -8
6806; CHECK-RV32C-NEXT:    lbu a6, 0(a1)
6807; CHECK-RV32C-NEXT:    lbu a7, 4(a1)
6808; CHECK-RV32C-NEXT:    lbu t0, 8(a1)
6809; CHECK-RV32C-NEXT:    lbu t1, 12(a1)
6810; CHECK-RV32C-NEXT:    lbu t2, 16(a1)
6811; CHECK-RV32C-NEXT:    lbu t3, 20(a1)
6812; CHECK-RV32C-NEXT:    lbu t4, 24(a1)
6813; CHECK-RV32C-NEXT:    lbu t5, 28(a1)
6814; CHECK-RV32C-NEXT:    lbu t6, 32(a1)
6815; CHECK-RV32C-NEXT:    lbu a3, 36(a1)
6816; CHECK-RV32C-NEXT:    lbu a4, 40(a1)
6817; CHECK-RV32C-NEXT:    lbu a5, 44(a1)
6818; CHECK-RV32C-NEXT:    lbu a2, 48(a1)
6819; CHECK-RV32C-NEXT:    lbu s0, 52(a1)
6820; CHECK-RV32C-NEXT:    lbu s1, 56(a1)
6821; CHECK-RV32C-NEXT:    lbu a1, 60(a1)
6822; CHECK-RV32C-NEXT:    c.ntl.all
6823; CHECK-RV32C-NEXT:    sb a2, 12(a0)
6824; CHECK-RV32C-NEXT:    c.ntl.all
6825; CHECK-RV32C-NEXT:    sb s0, 13(a0)
6826; CHECK-RV32C-NEXT:    c.ntl.all
6827; CHECK-RV32C-NEXT:    sb s1, 14(a0)
6828; CHECK-RV32C-NEXT:    c.ntl.all
6829; CHECK-RV32C-NEXT:    sb a1, 15(a0)
6830; CHECK-RV32C-NEXT:    c.ntl.all
6831; CHECK-RV32C-NEXT:    sb t6, 8(a0)
6832; CHECK-RV32C-NEXT:    c.ntl.all
6833; CHECK-RV32C-NEXT:    sb a3, 9(a0)
6834; CHECK-RV32C-NEXT:    c.ntl.all
6835; CHECK-RV32C-NEXT:    sb a4, 10(a0)
6836; CHECK-RV32C-NEXT:    c.ntl.all
6837; CHECK-RV32C-NEXT:    sb a5, 11(a0)
6838; CHECK-RV32C-NEXT:    c.ntl.all
6839; CHECK-RV32C-NEXT:    sb t2, 4(a0)
6840; CHECK-RV32C-NEXT:    c.ntl.all
6841; CHECK-RV32C-NEXT:    sb t3, 5(a0)
6842; CHECK-RV32C-NEXT:    c.ntl.all
6843; CHECK-RV32C-NEXT:    sb t4, 6(a0)
6844; CHECK-RV32C-NEXT:    c.ntl.all
6845; CHECK-RV32C-NEXT:    sb t5, 7(a0)
6846; CHECK-RV32C-NEXT:    c.ntl.all
6847; CHECK-RV32C-NEXT:    sb a6, 0(a0)
6848; CHECK-RV32C-NEXT:    c.ntl.all
6849; CHECK-RV32C-NEXT:    sb a7, 1(a0)
6850; CHECK-RV32C-NEXT:    c.ntl.all
6851; CHECK-RV32C-NEXT:    sb t0, 2(a0)
6852; CHECK-RV32C-NEXT:    c.ntl.all
6853; CHECK-RV32C-NEXT:    sb t1, 3(a0)
6854; CHECK-RV32C-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
6855; CHECK-RV32C-NEXT:    lw s1, 8(sp) # 4-byte Folded Reload
6856; CHECK-RV32C-NEXT:    .cfi_restore s0
6857; CHECK-RV32C-NEXT:    .cfi_restore s1
6858; CHECK-RV32C-NEXT:    addi sp, sp, 16
6859; CHECK-RV32C-NEXT:    .cfi_def_cfa_offset 0
6860; CHECK-RV32C-NEXT:    ret
6861;
6862; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v16i8:
6863; CHECK-RV64V:       # %bb.0:
6864; CHECK-RV64V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
6865; CHECK-RV64V-NEXT:    ntl.all
6866; CHECK-RV64V-NEXT:    vse8.v v8, (a0)
6867; CHECK-RV64V-NEXT:    ret
6868;
6869; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_v16i8:
6870; CHECK-RV32V:       # %bb.0:
6871; CHECK-RV32V-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
6872; CHECK-RV32V-NEXT:    ntl.all
6873; CHECK-RV32V-NEXT:    vse8.v v8, (a0)
6874; CHECK-RV32V-NEXT:    ret
6875  store <16 x i8> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
6876  ret void
6877}
6878
6879define void @test_nontemporal_ALL_store_v8i16(ptr %p, <8 x i16> %v) {
6880; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v8i16:
6881; CHECK-RV64:       # %bb.0:
6882; CHECK-RV64-NEXT:    lh a2, 0(a1)
6883; CHECK-RV64-NEXT:    lh a3, 8(a1)
6884; CHECK-RV64-NEXT:    lh a4, 16(a1)
6885; CHECK-RV64-NEXT:    lh a5, 24(a1)
6886; CHECK-RV64-NEXT:    lh a6, 32(a1)
6887; CHECK-RV64-NEXT:    lh a7, 40(a1)
6888; CHECK-RV64-NEXT:    lh t0, 48(a1)
6889; CHECK-RV64-NEXT:    lh a1, 56(a1)
6890; CHECK-RV64-NEXT:    ntl.all
6891; CHECK-RV64-NEXT:    sh a6, 8(a0)
6892; CHECK-RV64-NEXT:    ntl.all
6893; CHECK-RV64-NEXT:    sh a7, 10(a0)
6894; CHECK-RV64-NEXT:    ntl.all
6895; CHECK-RV64-NEXT:    sh t0, 12(a0)
6896; CHECK-RV64-NEXT:    ntl.all
6897; CHECK-RV64-NEXT:    sh a1, 14(a0)
6898; CHECK-RV64-NEXT:    ntl.all
6899; CHECK-RV64-NEXT:    sh a2, 0(a0)
6900; CHECK-RV64-NEXT:    ntl.all
6901; CHECK-RV64-NEXT:    sh a3, 2(a0)
6902; CHECK-RV64-NEXT:    ntl.all
6903; CHECK-RV64-NEXT:    sh a4, 4(a0)
6904; CHECK-RV64-NEXT:    ntl.all
6905; CHECK-RV64-NEXT:    sh a5, 6(a0)
6906; CHECK-RV64-NEXT:    ret
6907;
6908; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v8i16:
6909; CHECK-RV32:       # %bb.0:
6910; CHECK-RV32-NEXT:    lh a2, 0(a1)
6911; CHECK-RV32-NEXT:    lh a3, 4(a1)
6912; CHECK-RV32-NEXT:    lh a4, 8(a1)
6913; CHECK-RV32-NEXT:    lh a5, 12(a1)
6914; CHECK-RV32-NEXT:    lh a6, 16(a1)
6915; CHECK-RV32-NEXT:    lh a7, 20(a1)
6916; CHECK-RV32-NEXT:    lh t0, 24(a1)
6917; CHECK-RV32-NEXT:    lh a1, 28(a1)
6918; CHECK-RV32-NEXT:    ntl.all
6919; CHECK-RV32-NEXT:    sh a6, 8(a0)
6920; CHECK-RV32-NEXT:    ntl.all
6921; CHECK-RV32-NEXT:    sh a7, 10(a0)
6922; CHECK-RV32-NEXT:    ntl.all
6923; CHECK-RV32-NEXT:    sh t0, 12(a0)
6924; CHECK-RV32-NEXT:    ntl.all
6925; CHECK-RV32-NEXT:    sh a1, 14(a0)
6926; CHECK-RV32-NEXT:    ntl.all
6927; CHECK-RV32-NEXT:    sh a2, 0(a0)
6928; CHECK-RV32-NEXT:    ntl.all
6929; CHECK-RV32-NEXT:    sh a3, 2(a0)
6930; CHECK-RV32-NEXT:    ntl.all
6931; CHECK-RV32-NEXT:    sh a4, 4(a0)
6932; CHECK-RV32-NEXT:    ntl.all
6933; CHECK-RV32-NEXT:    sh a5, 6(a0)
6934; CHECK-RV32-NEXT:    ret
6935;
6936; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v8i16:
6937; CHECK-RV64C:       # %bb.0:
6938; CHECK-RV64C-NEXT:    lh a6, 0(a1)
6939; CHECK-RV64C-NEXT:    lh a7, 8(a1)
6940; CHECK-RV64C-NEXT:    lh t0, 16(a1)
6941; CHECK-RV64C-NEXT:    lh a5, 24(a1)
6942; CHECK-RV64C-NEXT:    lh a2, 32(a1)
6943; CHECK-RV64C-NEXT:    lh a3, 40(a1)
6944; CHECK-RV64C-NEXT:    lh a4, 48(a1)
6945; CHECK-RV64C-NEXT:    lh a1, 56(a1)
6946; CHECK-RV64C-NEXT:    c.ntl.all
6947; CHECK-RV64C-NEXT:    sh a2, 8(a0)
6948; CHECK-RV64C-NEXT:    c.ntl.all
6949; CHECK-RV64C-NEXT:    sh a3, 10(a0)
6950; CHECK-RV64C-NEXT:    c.ntl.all
6951; CHECK-RV64C-NEXT:    sh a4, 12(a0)
6952; CHECK-RV64C-NEXT:    c.ntl.all
6953; CHECK-RV64C-NEXT:    sh a1, 14(a0)
6954; CHECK-RV64C-NEXT:    c.ntl.all
6955; CHECK-RV64C-NEXT:    sh a6, 0(a0)
6956; CHECK-RV64C-NEXT:    c.ntl.all
6957; CHECK-RV64C-NEXT:    sh a7, 2(a0)
6958; CHECK-RV64C-NEXT:    c.ntl.all
6959; CHECK-RV64C-NEXT:    sh t0, 4(a0)
6960; CHECK-RV64C-NEXT:    c.ntl.all
6961; CHECK-RV64C-NEXT:    sh a5, 6(a0)
6962; CHECK-RV64C-NEXT:    ret
6963;
6964; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v8i16:
6965; CHECK-RV32C:       # %bb.0:
6966; CHECK-RV32C-NEXT:    lh a6, 0(a1)
6967; CHECK-RV32C-NEXT:    lh a7, 4(a1)
6968; CHECK-RV32C-NEXT:    lh t0, 8(a1)
6969; CHECK-RV32C-NEXT:    lh a5, 12(a1)
6970; CHECK-RV32C-NEXT:    lh a2, 16(a1)
6971; CHECK-RV32C-NEXT:    lh a3, 20(a1)
6972; CHECK-RV32C-NEXT:    lh a4, 24(a1)
6973; CHECK-RV32C-NEXT:    lh a1, 28(a1)
6974; CHECK-RV32C-NEXT:    c.ntl.all
6975; CHECK-RV32C-NEXT:    sh a2, 8(a0)
6976; CHECK-RV32C-NEXT:    c.ntl.all
6977; CHECK-RV32C-NEXT:    sh a3, 10(a0)
6978; CHECK-RV32C-NEXT:    c.ntl.all
6979; CHECK-RV32C-NEXT:    sh a4, 12(a0)
6980; CHECK-RV32C-NEXT:    c.ntl.all
6981; CHECK-RV32C-NEXT:    sh a1, 14(a0)
6982; CHECK-RV32C-NEXT:    c.ntl.all
6983; CHECK-RV32C-NEXT:    sh a6, 0(a0)
6984; CHECK-RV32C-NEXT:    c.ntl.all
6985; CHECK-RV32C-NEXT:    sh a7, 2(a0)
6986; CHECK-RV32C-NEXT:    c.ntl.all
6987; CHECK-RV32C-NEXT:    sh t0, 4(a0)
6988; CHECK-RV32C-NEXT:    c.ntl.all
6989; CHECK-RV32C-NEXT:    sh a5, 6(a0)
6990; CHECK-RV32C-NEXT:    ret
6991;
6992; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v8i16:
6993; CHECK-RV64V:       # %bb.0:
6994; CHECK-RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
6995; CHECK-RV64V-NEXT:    ntl.all
6996; CHECK-RV64V-NEXT:    vse16.v v8, (a0)
6997; CHECK-RV64V-NEXT:    ret
6998;
6999; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_v8i16:
7000; CHECK-RV32V:       # %bb.0:
7001; CHECK-RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
7002; CHECK-RV32V-NEXT:    ntl.all
7003; CHECK-RV32V-NEXT:    vse16.v v8, (a0)
7004; CHECK-RV32V-NEXT:    ret
7005  store <8 x i16> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
7006  ret void
7007}
7008
7009define void @test_nontemporal_ALL_store_v4i32(ptr %p, <4 x i32> %v) {
7010; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v4i32:
7011; CHECK-RV64:       # %bb.0:
7012; CHECK-RV64-NEXT:    lw a2, 0(a1)
7013; CHECK-RV64-NEXT:    lw a3, 8(a1)
7014; CHECK-RV64-NEXT:    lw a4, 16(a1)
7015; CHECK-RV64-NEXT:    lw a1, 24(a1)
7016; CHECK-RV64-NEXT:    ntl.all
7017; CHECK-RV64-NEXT:    sw a2, 0(a0)
7018; CHECK-RV64-NEXT:    ntl.all
7019; CHECK-RV64-NEXT:    sw a3, 4(a0)
7020; CHECK-RV64-NEXT:    ntl.all
7021; CHECK-RV64-NEXT:    sw a4, 8(a0)
7022; CHECK-RV64-NEXT:    ntl.all
7023; CHECK-RV64-NEXT:    sw a1, 12(a0)
7024; CHECK-RV64-NEXT:    ret
7025;
7026; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v4i32:
7027; CHECK-RV32:       # %bb.0:
7028; CHECK-RV32-NEXT:    lw a2, 0(a1)
7029; CHECK-RV32-NEXT:    lw a3, 4(a1)
7030; CHECK-RV32-NEXT:    lw a4, 8(a1)
7031; CHECK-RV32-NEXT:    lw a1, 12(a1)
7032; CHECK-RV32-NEXT:    ntl.all
7033; CHECK-RV32-NEXT:    sw a2, 0(a0)
7034; CHECK-RV32-NEXT:    ntl.all
7035; CHECK-RV32-NEXT:    sw a3, 4(a0)
7036; CHECK-RV32-NEXT:    ntl.all
7037; CHECK-RV32-NEXT:    sw a4, 8(a0)
7038; CHECK-RV32-NEXT:    ntl.all
7039; CHECK-RV32-NEXT:    sw a1, 12(a0)
7040; CHECK-RV32-NEXT:    ret
7041;
7042; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v4i32:
7043; CHECK-RV64C:       # %bb.0:
7044; CHECK-RV64C-NEXT:    lw a2, 0(a1)
7045; CHECK-RV64C-NEXT:    lw a3, 8(a1)
7046; CHECK-RV64C-NEXT:    lw a4, 16(a1)
7047; CHECK-RV64C-NEXT:    lw a1, 24(a1)
7048; CHECK-RV64C-NEXT:    c.ntl.all
7049; CHECK-RV64C-NEXT:    sw a2, 0(a0)
7050; CHECK-RV64C-NEXT:    c.ntl.all
7051; CHECK-RV64C-NEXT:    sw a3, 4(a0)
7052; CHECK-RV64C-NEXT:    c.ntl.all
7053; CHECK-RV64C-NEXT:    sw a4, 8(a0)
7054; CHECK-RV64C-NEXT:    c.ntl.all
7055; CHECK-RV64C-NEXT:    sw a1, 12(a0)
7056; CHECK-RV64C-NEXT:    ret
7057;
7058; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v4i32:
7059; CHECK-RV32C:       # %bb.0:
7060; CHECK-RV32C-NEXT:    lw a2, 0(a1)
7061; CHECK-RV32C-NEXT:    lw a3, 4(a1)
7062; CHECK-RV32C-NEXT:    lw a4, 8(a1)
7063; CHECK-RV32C-NEXT:    lw a1, 12(a1)
7064; CHECK-RV32C-NEXT:    c.ntl.all
7065; CHECK-RV32C-NEXT:    sw a2, 0(a0)
7066; CHECK-RV32C-NEXT:    c.ntl.all
7067; CHECK-RV32C-NEXT:    sw a3, 4(a0)
7068; CHECK-RV32C-NEXT:    c.ntl.all
7069; CHECK-RV32C-NEXT:    sw a4, 8(a0)
7070; CHECK-RV32C-NEXT:    c.ntl.all
7071; CHECK-RV32C-NEXT:    sw a1, 12(a0)
7072; CHECK-RV32C-NEXT:    ret
7073;
7074; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v4i32:
7075; CHECK-RV64V:       # %bb.0:
7076; CHECK-RV64V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
7077; CHECK-RV64V-NEXT:    ntl.all
7078; CHECK-RV64V-NEXT:    vse32.v v8, (a0)
7079; CHECK-RV64V-NEXT:    ret
7080;
7081; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_v4i32:
7082; CHECK-RV32V:       # %bb.0:
7083; CHECK-RV32V-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
7084; CHECK-RV32V-NEXT:    ntl.all
7085; CHECK-RV32V-NEXT:    vse32.v v8, (a0)
7086; CHECK-RV32V-NEXT:    ret
7087  store <4 x i32> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
7088  ret void
7089}
7090
7091define void @test_nontemporal_ALL_store_v2i64(ptr %p, <2 x i64> %v) {
7092; CHECK-RV64-LABEL: test_nontemporal_ALL_store_v2i64:
7093; CHECK-RV64:       # %bb.0:
7094; CHECK-RV64-NEXT:    ntl.all
7095; CHECK-RV64-NEXT:    sd a1, 0(a0)
7096; CHECK-RV64-NEXT:    ntl.all
7097; CHECK-RV64-NEXT:    sd a2, 8(a0)
7098; CHECK-RV64-NEXT:    ret
7099;
7100; CHECK-RV32-LABEL: test_nontemporal_ALL_store_v2i64:
7101; CHECK-RV32:       # %bb.0:
7102; CHECK-RV32-NEXT:    lw a2, 0(a1)
7103; CHECK-RV32-NEXT:    lw a3, 4(a1)
7104; CHECK-RV32-NEXT:    lw a4, 8(a1)
7105; CHECK-RV32-NEXT:    lw a1, 12(a1)
7106; CHECK-RV32-NEXT:    ntl.all
7107; CHECK-RV32-NEXT:    sw a2, 0(a0)
7108; CHECK-RV32-NEXT:    ntl.all
7109; CHECK-RV32-NEXT:    sw a3, 4(a0)
7110; CHECK-RV32-NEXT:    ntl.all
7111; CHECK-RV32-NEXT:    sw a4, 8(a0)
7112; CHECK-RV32-NEXT:    ntl.all
7113; CHECK-RV32-NEXT:    sw a1, 12(a0)
7114; CHECK-RV32-NEXT:    ret
7115;
7116; CHECK-RV64C-LABEL: test_nontemporal_ALL_store_v2i64:
7117; CHECK-RV64C:       # %bb.0:
7118; CHECK-RV64C-NEXT:    c.ntl.all
7119; CHECK-RV64C-NEXT:    sd a1, 0(a0)
7120; CHECK-RV64C-NEXT:    c.ntl.all
7121; CHECK-RV64C-NEXT:    sd a2, 8(a0)
7122; CHECK-RV64C-NEXT:    ret
7123;
7124; CHECK-RV32C-LABEL: test_nontemporal_ALL_store_v2i64:
7125; CHECK-RV32C:       # %bb.0:
7126; CHECK-RV32C-NEXT:    lw a2, 0(a1)
7127; CHECK-RV32C-NEXT:    lw a3, 4(a1)
7128; CHECK-RV32C-NEXT:    lw a4, 8(a1)
7129; CHECK-RV32C-NEXT:    lw a1, 12(a1)
7130; CHECK-RV32C-NEXT:    c.ntl.all
7131; CHECK-RV32C-NEXT:    sw a2, 0(a0)
7132; CHECK-RV32C-NEXT:    c.ntl.all
7133; CHECK-RV32C-NEXT:    sw a3, 4(a0)
7134; CHECK-RV32C-NEXT:    c.ntl.all
7135; CHECK-RV32C-NEXT:    sw a4, 8(a0)
7136; CHECK-RV32C-NEXT:    c.ntl.all
7137; CHECK-RV32C-NEXT:    sw a1, 12(a0)
7138; CHECK-RV32C-NEXT:    ret
7139;
7140; CHECK-RV64V-LABEL: test_nontemporal_ALL_store_v2i64:
7141; CHECK-RV64V:       # %bb.0:
7142; CHECK-RV64V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
7143; CHECK-RV64V-NEXT:    ntl.all
7144; CHECK-RV64V-NEXT:    vse64.v v8, (a0)
7145; CHECK-RV64V-NEXT:    ret
7146;
7147; CHECK-RV32V-LABEL: test_nontemporal_ALL_store_v2i64:
7148; CHECK-RV32V:       # %bb.0:
7149; CHECK-RV32V-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
7150; CHECK-RV32V-NEXT:    ntl.all
7151; CHECK-RV32V-NEXT:    vse64.v v8, (a0)
7152; CHECK-RV32V-NEXT:    ret
7153  store <2 x i64> %v, ptr %p, !nontemporal !0, !riscv-nontemporal-domain !4
7154  ret void
7155}
7156
7157
7158!0 = !{i32 1}
7159!1 = !{i32 2}
7160!2 = !{i32 3}
7161!3 = !{i32 4}
7162!4 = !{i32 5}
7163