xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-ldst-ext.ll (revision 1ee315ae7964c8433b772e0b5d667834994ba753)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
3
4;
5; LD1SB/LD1B
6;
7
8define <vscale x 16 x i32> @ld1b_i8_sext_i32(ptr %base) {
9; CHECK-LABEL: ld1b_i8_sext_i32:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    ptrue p0.s
12; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0]
13; CHECK-NEXT:    ld1sb { z1.s }, p0/z, [x0, #1, mul vl]
14; CHECK-NEXT:    ld1sb { z2.s }, p0/z, [x0, #2, mul vl]
15; CHECK-NEXT:    ld1sb { z3.s }, p0/z, [x0, #3, mul vl]
16; CHECK-NEXT:    ret
17  %wide.load = load <vscale x 16 x i8>, ptr %base
18  %res = sext <vscale x 16 x i8> %wide.load to <vscale x 16 x i32>
19  ret <vscale x 16 x i32> %res
20}
21
22define <vscale x 16 x i32> @ld1b_i8_zext_i32(ptr %base) {
23; CHECK-LABEL: ld1b_i8_zext_i32:
24; CHECK:       // %bb.0:
25; CHECK-NEXT:    ptrue p0.s
26; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0]
27; CHECK-NEXT:    ld1b { z1.s }, p0/z, [x0, #1, mul vl]
28; CHECK-NEXT:    ld1b { z2.s }, p0/z, [x0, #2, mul vl]
29; CHECK-NEXT:    ld1b { z3.s }, p0/z, [x0, #3, mul vl]
30; CHECK-NEXT:    ret
31  %wide.load = load <vscale x 16 x i8>, ptr %base
32  %res = zext <vscale x 16 x i8> %wide.load to <vscale x 16 x i32>
33  ret <vscale x 16 x i32> %res
34}
35
36define <vscale x 16 x i64> @ld1b_i8_sext(ptr %base) {
37; CHECK-LABEL: ld1b_i8_sext:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    ptrue p0.d
40; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0]
41; CHECK-NEXT:    ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
42; CHECK-NEXT:    ld1sb { z2.d }, p0/z, [x0, #2, mul vl]
43; CHECK-NEXT:    ld1sb { z3.d }, p0/z, [x0, #3, mul vl]
44; CHECK-NEXT:    ld1sb { z4.d }, p0/z, [x0, #4, mul vl]
45; CHECK-NEXT:    ld1sb { z5.d }, p0/z, [x0, #5, mul vl]
46; CHECK-NEXT:    ld1sb { z6.d }, p0/z, [x0, #6, mul vl]
47; CHECK-NEXT:    ld1sb { z7.d }, p0/z, [x0, #7, mul vl]
48; CHECK-NEXT:    ret
49  %wide.load = load <vscale x 16 x i8>, ptr %base
50  %res = sext <vscale x 16 x i8> %wide.load to <vscale x 16 x i64>
51  ret <vscale x 16 x i64> %res
52}
53
54define <vscale x 16 x i64> @ld1b_i8_zext(ptr %base) {
55; CHECK-LABEL: ld1b_i8_zext:
56; CHECK:       // %bb.0:
57; CHECK-NEXT:    ptrue p0.d
58; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0]
59; CHECK-NEXT:    ld1b { z1.d }, p0/z, [x0, #1, mul vl]
60; CHECK-NEXT:    ld1b { z2.d }, p0/z, [x0, #2, mul vl]
61; CHECK-NEXT:    ld1b { z3.d }, p0/z, [x0, #3, mul vl]
62; CHECK-NEXT:    ld1b { z4.d }, p0/z, [x0, #4, mul vl]
63; CHECK-NEXT:    ld1b { z5.d }, p0/z, [x0, #5, mul vl]
64; CHECK-NEXT:    ld1b { z6.d }, p0/z, [x0, #6, mul vl]
65; CHECK-NEXT:    ld1b { z7.d }, p0/z, [x0, #7, mul vl]
66; CHECK-NEXT:    ret
67  %wide.load = load <vscale x 16 x i8>, ptr %base
68  %res = zext <vscale x 16 x i8> %wide.load to <vscale x 16 x i64>
69  ret <vscale x 16 x i64> %res
70}
71
72;
73; LD1H
74;
75
76define <vscale x 8 x i64> @ld1h_i16_sext(ptr %base) {
77; CHECK-LABEL: ld1h_i16_sext:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    ptrue p0.d
80; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0]
81; CHECK-NEXT:    ld1sh { z1.d }, p0/z, [x0, #1, mul vl]
82; CHECK-NEXT:    ld1sh { z2.d }, p0/z, [x0, #2, mul vl]
83; CHECK-NEXT:    ld1sh { z3.d }, p0/z, [x0, #3, mul vl]
84; CHECK-NEXT:    ret
85  %wide.load = load <vscale x 8 x i16>, ptr %base
86  %res = sext <vscale x 8 x i16> %wide.load to <vscale x 8 x i64>
87  ret <vscale x 8 x i64> %res
88}
89
90define <vscale x 8 x i64> @ld1h_i16_zext(ptr %base) {
91; CHECK-LABEL: ld1h_i16_zext:
92; CHECK:       // %bb.0:
93; CHECK-NEXT:    ptrue p0.d
94; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
95; CHECK-NEXT:    ld1h { z1.d }, p0/z, [x0, #1, mul vl]
96; CHECK-NEXT:    ld1h { z2.d }, p0/z, [x0, #2, mul vl]
97; CHECK-NEXT:    ld1h { z3.d }, p0/z, [x0, #3, mul vl]
98; CHECK-NEXT:    ret
99  %wide.load = load <vscale x 8 x i16>, ptr %base
100  %res = zext <vscale x 8 x i16> %wide.load to <vscale x 8 x i64>
101  ret <vscale x 8 x i64> %res
102}
103
104;
105; LD1W
106;
107
108define <vscale x 4 x i64> @ld1w_i32_sext(ptr %base) {
109; CHECK-LABEL: ld1w_i32_sext:
110; CHECK:       // %bb.0:
111; CHECK-NEXT:    ptrue p0.d
112; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0]
113; CHECK-NEXT:    ld1sw { z1.d }, p0/z, [x0, #1, mul vl]
114; CHECK-NEXT:    ret
115  %wide.load = load <vscale x 4 x i32>, ptr %base
116  %res = sext <vscale x 4 x i32> %wide.load to <vscale x 4 x i64>
117  ret <vscale x 4 x i64> %res
118}
119
120define <vscale x 4 x i64> @ld1w_i32_zext(ptr %base) {
121; CHECK-LABEL: ld1w_i32_zext:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    ptrue p0.d
124; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
125; CHECK-NEXT:    ld1w { z1.d }, p0/z, [x0, #1, mul vl]
126; CHECK-NEXT:    ret
127  %wide.load = load <vscale x 4 x i32>, ptr %base
128  %res = zext <vscale x 4 x i32> %wide.load to <vscale x 4 x i64>
129  ret <vscale x 4 x i64> %res
130}
131
132
133; Extending loads from unpacked to wide illegal types
134
135define <vscale x 4 x i64> @zload_4i8_4i64(ptr %a) {
136; CHECK-LABEL: zload_4i8_4i64:
137; CHECK:       // %bb.0:
138; CHECK-NEXT:    ptrue p0.d
139; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0]
140; CHECK-NEXT:    ld1b { z1.d }, p0/z, [x0, #1, mul vl]
141; CHECK-NEXT:    ret
142  %aval = load <vscale x 4 x i8>, ptr %a
143  %aext = zext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
144  ret <vscale x 4 x i64> %aext
145}
146
147define <vscale x 4 x i64> @zload_4i16_4i64(ptr %a) {
148; CHECK-LABEL: zload_4i16_4i64:
149; CHECK:       // %bb.0:
150; CHECK-NEXT:    ptrue p0.d
151; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
152; CHECK-NEXT:    ld1h { z1.d }, p0/z, [x0, #1, mul vl]
153; CHECK-NEXT:    ret
154  %aval = load <vscale x 4 x i16>, ptr %a
155  %aext = zext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
156  ret <vscale x 4 x i64> %aext
157}
158
159define <vscale x 8 x i32> @zload_8i8_8i32(ptr %a) {
160; CHECK-LABEL: zload_8i8_8i32:
161; CHECK:       // %bb.0:
162; CHECK-NEXT:    ptrue p0.s
163; CHECK-NEXT:    ld1b { z0.s }, p0/z, [x0]
164; CHECK-NEXT:    ld1b { z1.s }, p0/z, [x0, #1, mul vl]
165; CHECK-NEXT:    ret
166  %aval = load <vscale x 8 x i8>, ptr %a
167  %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
168  ret <vscale x 8 x i32> %aext
169}
170
171define <vscale x 8 x i64> @zload_8i8_8i64(ptr %a) {
172; CHECK-LABEL: zload_8i8_8i64:
173; CHECK:       // %bb.0:
174; CHECK-NEXT:    ptrue p0.d
175; CHECK-NEXT:    ld1b { z0.d }, p0/z, [x0]
176; CHECK-NEXT:    ld1b { z1.d }, p0/z, [x0, #1, mul vl]
177; CHECK-NEXT:    ld1b { z2.d }, p0/z, [x0, #2, mul vl]
178; CHECK-NEXT:    ld1b { z3.d }, p0/z, [x0, #3, mul vl]
179; CHECK-NEXT:    ret
180  %aval = load <vscale x 8 x i8>, ptr %a
181  %aext = zext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
182  ret <vscale x 8 x i64> %aext
183}
184
185define <vscale x 4 x i64> @sload_4i8_4i64(ptr %a) {
186; CHECK-LABEL: sload_4i8_4i64:
187; CHECK:       // %bb.0:
188; CHECK-NEXT:    ptrue p0.d
189; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0]
190; CHECK-NEXT:    ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
191; CHECK-NEXT:    ret
192  %aval = load <vscale x 4 x i8>, ptr %a
193  %aext = sext <vscale x 4 x i8> %aval to <vscale x 4 x i64>
194  ret <vscale x 4 x i64> %aext
195}
196
197define <vscale x 4 x i64> @sload_4i16_4i64(ptr %a) {
198; CHECK-LABEL: sload_4i16_4i64:
199; CHECK:       // %bb.0:
200; CHECK-NEXT:    ptrue p0.d
201; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [x0]
202; CHECK-NEXT:    ld1sh { z1.d }, p0/z, [x0, #1, mul vl]
203; CHECK-NEXT:    ret
204  %aval = load <vscale x 4 x i16>, ptr %a
205  %aext = sext <vscale x 4 x i16> %aval to <vscale x 4 x i64>
206  ret <vscale x 4 x i64> %aext
207}
208
209define <vscale x 8 x i32> @sload_8i8_8i32(ptr %a) {
210; CHECK-LABEL: sload_8i8_8i32:
211; CHECK:       // %bb.0:
212; CHECK-NEXT:    ptrue p0.s
213; CHECK-NEXT:    ld1sb { z0.s }, p0/z, [x0]
214; CHECK-NEXT:    ld1sb { z1.s }, p0/z, [x0, #1, mul vl]
215; CHECK-NEXT:    ret
216  %aval = load <vscale x 8 x i8>, ptr %a
217  %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i32>
218  ret <vscale x 8 x i32> %aext
219}
220
221define <vscale x 8 x i64> @sload_8i8_8i64(ptr %a) {
222; CHECK-LABEL: sload_8i8_8i64:
223; CHECK:       // %bb.0:
224; CHECK-NEXT:    ptrue p0.d
225; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [x0]
226; CHECK-NEXT:    ld1sb { z1.d }, p0/z, [x0, #1, mul vl]
227; CHECK-NEXT:    ld1sb { z2.d }, p0/z, [x0, #2, mul vl]
228; CHECK-NEXT:    ld1sb { z3.d }, p0/z, [x0, #3, mul vl]
229; CHECK-NEXT:    ret
230  %aval = load <vscale x 8 x i8>, ptr %a
231  %aext = sext <vscale x 8 x i8> %aval to <vscale x 8 x i64>
232  ret <vscale x 8 x i64> %aext
233}
234