xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll (revision 1ee315ae7964c8433b772e0b5d667834994ba753)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3
4; Range checks: for all the instruction tested in this file, the
5; immediate must be within the range [-8, 7] (4-bit immediate). Out of
6; range values are tested only in one case (following). Valid values
7; are tested all through the rest of the file.
8
9define void @imm_out_of_range(ptr %base, <vscale x 2 x i1> %mask) nounwind {
10; CHECK-LABEL: imm_out_of_range:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    rdvl x8, #8
13; CHECK-NEXT:    add x8, x0, x8
14; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [x8]
15; CHECK-NEXT:    rdvl x8, #-9
16; CHECK-NEXT:    add x8, x0, x8
17; CHECK-NEXT:    stnt1d { z0.d }, p0, [x8]
18; CHECK-NEXT:    ret
19  %base_load = getelementptr <vscale x 2 x i64>, ptr %base, i64 8
20  %base_load_bc = bitcast ptr %base_load to ptr
21  %data = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %mask,
22                                                                  ptr %base_load_bc)
23  %base_store = getelementptr <vscale x 2 x i64>, ptr %base, i64 -9
24  %base_store_bc = bitcast ptr %base_store to ptr
25  call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data,
26                                            <vscale x 2 x i1> %mask,
27                                            ptr %base_store_bc)
28  ret void
29}
30
31; 2-lane non-temporal load/stores
32
33
34define void @test_masked_ldst_sv2i64(ptr %base, <vscale x 2 x i1> %mask) nounwind {
35; CHECK-LABEL: test_masked_ldst_sv2i64:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [x0, #-8, mul vl]
38; CHECK-NEXT:    stnt1d { z0.d }, p0, [x0, #-7, mul vl]
39; CHECK-NEXT:    ret
40  %base_load = getelementptr <vscale x 2 x i64>, ptr %base, i64 -8
41  %base_load_bc = bitcast ptr %base_load to ptr
42  %data = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %mask,
43                                                                  ptr %base_load_bc)
44  %base_store = getelementptr <vscale x 2 x i64>, ptr %base, i64 -7
45  %base_store_bc = bitcast ptr %base_store to ptr
46  call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data,
47                                            <vscale x 2 x i1> %mask,
48                                            ptr %base_store_bc)
49  ret void
50}
51
52define void @test_masked_ldst_sv2f64(ptr %base, <vscale x 2 x i1> %mask) nounwind {
53; CHECK-LABEL: test_masked_ldst_sv2f64:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    ldnt1d { z0.d }, p0/z, [x0, #-6, mul vl]
56; CHECK-NEXT:    stnt1d { z0.d }, p0, [x0, #-5, mul vl]
57; CHECK-NEXT:    ret
58  %base_load = getelementptr <vscale x 2 x double>, ptr %base, i64 -6
59  %base_load_bc = bitcast ptr %base_load to ptr
60  %data = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %mask,
61                                                                    ptr %base_load_bc)
62  %base_store = getelementptr <vscale x 2 x double>, ptr %base, i64 -5
63  %base_store_bc = bitcast ptr %base_store to ptr
64  call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data,
65                                            <vscale x 2 x i1> %mask,
66                                            ptr %base_store_bc)
67  ret void
68}
69
70; 4-lane non-temporal load/stores.
71
72define void @test_masked_ldst_sv4i32(ptr %base, <vscale x 4 x i1> %mask) nounwind {
73; CHECK-LABEL: test_masked_ldst_sv4i32:
74; CHECK:       // %bb.0:
75; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [x0, #6, mul vl]
76; CHECK-NEXT:    stnt1w { z0.s }, p0, [x0, #7, mul vl]
77; CHECK-NEXT:    ret
78  %base_load = getelementptr <vscale x 4 x i32>, ptr %base, i64 6
79  %base_load_bc = bitcast ptr %base_load to ptr
80  %data = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %mask,
81                                                                  ptr %base_load_bc)
82  %base_store = getelementptr <vscale x 4 x i32>, ptr %base, i64 7
83  %base_store_bc = bitcast ptr %base_store to ptr
84  call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data,
85                                            <vscale x 4 x i1> %mask,
86                                            ptr %base_store_bc)
87  ret void
88}
89
90define void @test_masked_ldst_sv4f32(ptr %base, <vscale x 4 x i1> %mask) nounwind {
91; CHECK-LABEL: test_masked_ldst_sv4f32:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    ldnt1w { z0.s }, p0/z, [x0, #-1, mul vl]
94; CHECK-NEXT:    stnt1w { z0.s }, p0, [x0, #2, mul vl]
95; CHECK-NEXT:    ret
96  %base_load = getelementptr <vscale x 4 x float>, ptr %base, i64 -1
97  %base_load_bc = bitcast ptr %base_load to ptr
98  %data = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %mask,
99                                                                    ptr %base_load_bc)
100  %base_store = getelementptr <vscale x 4 x float>, ptr %base, i64 2
101  %base_store_bc = bitcast ptr %base_store to ptr
102  call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data,
103                                            <vscale x 4 x i1> %mask,
104                                            ptr %base_store_bc)
105  ret void
106}
107
108
109; 8-lane non-temporal load/stores.
110
111define void @test_masked_ldst_sv8i16(ptr %base, <vscale x 8 x i1> %mask) nounwind {
112; CHECK-LABEL: test_masked_ldst_sv8i16:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    ldnt1h { z0.h }, p0/z, [x0, #6, mul vl]
115; CHECK-NEXT:    stnt1h { z0.h }, p0, [x0, #7, mul vl]
116; CHECK-NEXT:    ret
117  %base_load = getelementptr <vscale x 8 x i16>, ptr %base, i64 6
118  %base_load_bc = bitcast ptr %base_load to ptr
119  %data = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %mask,
120                                                                  ptr %base_load_bc)
121  %base_store = getelementptr <vscale x 8 x i16>, ptr %base, i64 7
122  %base_store_bc = bitcast ptr %base_store to ptr
123  call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data,
124                                            <vscale x 8 x i1> %mask,
125                                            ptr %base_store_bc)
126  ret void
127}
128
129define void @test_masked_ldst_sv8f16(ptr %base, <vscale x 8 x i1> %mask) nounwind {
130; CHECK-LABEL: test_masked_ldst_sv8f16:
131; CHECK:       // %bb.0:
132; CHECK-NEXT:    ldnt1h { z0.h }, p0/z, [x0, #-1, mul vl]
133; CHECK-NEXT:    stnt1h { z0.h }, p0, [x0, #2, mul vl]
134; CHECK-NEXT:    ret
135  %base_load = getelementptr <vscale x 8 x half>, ptr %base, i64 -1
136  %base_load_bc = bitcast ptr %base_load to ptr
137  %data = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %mask,
138                                                                   ptr %base_load_bc)
139  %base_store = getelementptr <vscale x 8 x half>, ptr %base, i64 2
140  %base_store_bc = bitcast ptr %base_store to ptr
141  call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data,
142                                            <vscale x 8 x i1> %mask,
143                                            ptr %base_store_bc)
144  ret void
145}
146
147define void @test_masked_ldst_sv8bf16(ptr %base, <vscale x 8 x i1> %mask) nounwind #0 {
148; CHECK-LABEL: test_masked_ldst_sv8bf16:
149; CHECK:       // %bb.0:
150; CHECK-NEXT:    ldnt1h { z0.h }, p0/z, [x0, #-1, mul vl]
151; CHECK-NEXT:    stnt1h { z0.h }, p0, [x0, #2, mul vl]
152; CHECK-NEXT:    ret
153  %base_load = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 -1
154  %base_load_bc = bitcast ptr %base_load to ptr
155  %data = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> %mask,
156                                                                      ptr %base_load_bc)
157  %base_store = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 2
158  %base_store_bc = bitcast ptr %base_store to ptr
159  call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> %data,
160                                             <vscale x 8 x i1> %mask,
161                                             ptr %base_store_bc)
162  ret void
163}
164
165; 16-lane non-temporal load/stores.
166
167define void @test_masked_ldst_sv16i8(ptr %base, <vscale x 16 x i1> %mask) nounwind {
168; CHECK-LABEL: test_masked_ldst_sv16i8:
169; CHECK:       // %bb.0:
170; CHECK-NEXT:    ldnt1b { z0.b }, p0/z, [x0, #6, mul vl]
171; CHECK-NEXT:    stnt1b { z0.b }, p0, [x0, #7, mul vl]
172; CHECK-NEXT:    ret
173  %base_load = getelementptr <vscale x 16 x i8>, ptr %base, i64 6
174  %base_load_bc = bitcast ptr %base_load to ptr
175  %data = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %mask,
176                                                                  ptr %base_load_bc)
177  %base_store = getelementptr <vscale x 16 x i8>, ptr %base, i64 7
178  %base_store_bc = bitcast ptr %base_store to ptr
179  call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data,
180                                            <vscale x 16 x i1> %mask,
181                                            ptr %base_store_bc)
182  ret void
183}
184
185; 2-element non-temporal loads.
186declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1>, ptr)
187declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1>, ptr)
188
189; 4-element non-temporal loads.
190declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1>, ptr)
191declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1>, ptr)
192
193; 8-element non-temporal loads.
194declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1>, ptr)
195declare <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1>, ptr)
196declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1>, ptr)
197
198; 16-element non-temporal loads.
199declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1>, ptr)
200
201; 2-element non-temporal stores.
202declare void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
203declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, ptr)
204
205; 4-element non-temporal stores.
206declare void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
207declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, ptr)
208
209; 8-element non-temporal stores.
210declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
211declare void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, ptr)
212declare void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
213
214; 16-element non-temporal stores.
215declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
216
217; +bf16 is required for the bfloat version.
218attributes #0 = { "target-features"="+sve,+bf16" }
219