xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-intrinsics-mask-ldst-ext.ll (revision 1ee315ae7964c8433b772e0b5d667834994ba753)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=1 < %s | FileCheck %s
3
4;
5; LD1B
6;
7
8define <vscale x 16 x i32> @masked_ld1b_i8_sext_i32(ptr %base, <vscale x 16 x i1> %mask) {
9; CHECK-LABEL: masked_ld1b_i8_sext_i32:
10; CHECK:       // %bb.0:
11; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
12; CHECK-NEXT:    sunpklo z1.h, z0.b
13; CHECK-NEXT:    sunpkhi z3.h, z0.b
14; CHECK-NEXT:    sunpklo z0.s, z1.h
15; CHECK-NEXT:    sunpkhi z1.s, z1.h
16; CHECK-NEXT:    sunpklo z2.s, z3.h
17; CHECK-NEXT:    sunpkhi z3.s, z3.h
18; CHECK-NEXT:    ret
19  %wide.masked.load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %base, i32 2, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef)
20  %res = sext <vscale x 16 x i8> %wide.masked.load to <vscale x 16 x i32>
21  ret <vscale x 16 x i32> %res
22}
23
24define <vscale x 8 x i32> @masked_ld1b_nxv8i8_sext_i32(ptr %a, <vscale x 8 x i1> %mask) {
25; CHECK-LABEL: masked_ld1b_nxv8i8_sext_i32:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    ld1sb { z1.h }, p0/z, [x0]
28; CHECK-NEXT:    sunpklo z0.s, z1.h
29; CHECK-NEXT:    sunpkhi z1.s, z1.h
30; CHECK-NEXT:    ret
31  %wide.masked.load = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr %a, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x i8> poison)
32  %res = sext <vscale x 8 x i8> %wide.masked.load to <vscale x 8 x i32>
33  ret <vscale x 8 x i32> %res
34}
35
36define <vscale x 16 x i32> @masked_ld1b_i8_zext_i32(ptr %base, <vscale x 16 x i1> %mask) {
37; CHECK-LABEL: masked_ld1b_i8_zext_i32:
38; CHECK:       // %bb.0:
39; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
40; CHECK-NEXT:    uunpklo z1.h, z0.b
41; CHECK-NEXT:    uunpkhi z3.h, z0.b
42; CHECK-NEXT:    uunpklo z0.s, z1.h
43; CHECK-NEXT:    uunpkhi z1.s, z1.h
44; CHECK-NEXT:    uunpklo z2.s, z3.h
45; CHECK-NEXT:    uunpkhi z3.s, z3.h
46; CHECK-NEXT:    ret
47  %wide.masked.load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %base, i32 2, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef)
48  %res = zext <vscale x 16 x i8> %wide.masked.load to <vscale x 16 x i32>
49  ret <vscale x 16 x i32> %res
50}
51
52define <vscale x 8 x i32> @masked_ld1b_nxv8i8_zext_i32(ptr %a, <vscale x 8 x i1> %mask) {
53; CHECK-LABEL: masked_ld1b_nxv8i8_zext_i32:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    ld1b { z1.h }, p0/z, [x0]
56; CHECK-NEXT:    uunpklo z0.s, z1.h
57; CHECK-NEXT:    uunpkhi z1.s, z1.h
58; CHECK-NEXT:    ret
59  %wide.masked.load = call <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr %a, i32 1, <vscale x 8 x i1> %mask, <vscale x 8 x i8> poison)
60  %res = zext <vscale x 8 x i8> %wide.masked.load to <vscale x 8 x i32>
61  ret <vscale x 8 x i32> %res
62}
63
64define <vscale x 16 x i64> @masked_ld1b_i8_sext(ptr %base, <vscale x 16 x i1> %mask) {
65; CHECK-LABEL: masked_ld1b_i8_sext:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
68; CHECK-NEXT:    sunpklo z1.h, z0.b
69; CHECK-NEXT:    sunpkhi z0.h, z0.b
70; CHECK-NEXT:    sunpklo z2.s, z1.h
71; CHECK-NEXT:    sunpkhi z3.s, z1.h
72; CHECK-NEXT:    sunpklo z5.s, z0.h
73; CHECK-NEXT:    sunpkhi z7.s, z0.h
74; CHECK-NEXT:    sunpklo z0.d, z2.s
75; CHECK-NEXT:    sunpkhi z1.d, z2.s
76; CHECK-NEXT:    sunpklo z2.d, z3.s
77; CHECK-NEXT:    sunpkhi z3.d, z3.s
78; CHECK-NEXT:    sunpklo z4.d, z5.s
79; CHECK-NEXT:    sunpkhi z5.d, z5.s
80; CHECK-NEXT:    sunpklo z6.d, z7.s
81; CHECK-NEXT:    sunpkhi z7.d, z7.s
82; CHECK-NEXT:    ret
83  %wide.masked.load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %base, i32 2, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef)
84  %res = sext <vscale x 16 x i8> %wide.masked.load to <vscale x 16 x i64>
85  ret <vscale x 16 x i64> %res
86}
87
88define <vscale x 4 x i64> @masked_ld1b_nxv4i8_sext_i64(ptr %a, <vscale x 4 x i1> %mask) {
89; CHECK-LABEL: masked_ld1b_nxv4i8_sext_i64:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    ld1sb { z1.s }, p0/z, [x0]
92; CHECK-NEXT:    sunpklo z0.d, z1.s
93; CHECK-NEXT:    sunpkhi z1.d, z1.s
94; CHECK-NEXT:    ret
95  %wide.masked.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> poison)
96  %res = sext <vscale x 4 x i8> %wide.masked.load to <vscale x 4 x i64>
97  ret <vscale x 4 x i64> %res
98}
99
100define <vscale x 16 x i64> @masked_ld1b_i8_zext(ptr %base, <vscale x 16 x i1> %mask) {
101; CHECK-LABEL: masked_ld1b_i8_zext:
102; CHECK:       // %bb.0:
103; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
104; CHECK-NEXT:    uunpklo z1.h, z0.b
105; CHECK-NEXT:    uunpkhi z0.h, z0.b
106; CHECK-NEXT:    uunpklo z2.s, z1.h
107; CHECK-NEXT:    uunpkhi z3.s, z1.h
108; CHECK-NEXT:    uunpklo z5.s, z0.h
109; CHECK-NEXT:    uunpkhi z7.s, z0.h
110; CHECK-NEXT:    uunpklo z0.d, z2.s
111; CHECK-NEXT:    uunpkhi z1.d, z2.s
112; CHECK-NEXT:    uunpklo z2.d, z3.s
113; CHECK-NEXT:    uunpkhi z3.d, z3.s
114; CHECK-NEXT:    uunpklo z4.d, z5.s
115; CHECK-NEXT:    uunpkhi z5.d, z5.s
116; CHECK-NEXT:    uunpklo z6.d, z7.s
117; CHECK-NEXT:    uunpkhi z7.d, z7.s
118; CHECK-NEXT:    ret
119  %wide.masked.load = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr %base, i32 2, <vscale x 16 x i1> %mask, <vscale x 16 x i8> undef)
120  %res = zext <vscale x 16 x i8> %wide.masked.load to <vscale x 16 x i64>
121  ret <vscale x 16 x i64> %res
122}
123
124define <vscale x 4 x i64> @masked_ld1b_nxv4i8_zext_i64(ptr %a, <vscale x 4 x i1> %mask) {
125; CHECK-LABEL: masked_ld1b_nxv4i8_zext_i64:
126; CHECK:       // %bb.0:
127; CHECK-NEXT:    ld1b { z1.s }, p0/z, [x0]
128; CHECK-NEXT:    uunpklo z0.d, z1.s
129; CHECK-NEXT:    uunpkhi z1.d, z1.s
130; CHECK-NEXT:    ret
131  %wide.masked.load = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i8> poison)
132  %res = zext <vscale x 4 x i8> %wide.masked.load to <vscale x 4 x i64>
133  ret <vscale x 4 x i64> %res
134}
135
136;
137; LD1H
138;
139
140define <vscale x 8 x i64> @masked_ld1h_i16_sext(ptr %base, <vscale x 8 x i1> %mask) {
141; CHECK-LABEL: masked_ld1h_i16_sext:
142; CHECK:       // %bb.0:
143; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
144; CHECK-NEXT:    sunpklo z1.s, z0.h
145; CHECK-NEXT:    sunpkhi z3.s, z0.h
146; CHECK-NEXT:    sunpklo z0.d, z1.s
147; CHECK-NEXT:    sunpkhi z1.d, z1.s
148; CHECK-NEXT:    sunpklo z2.d, z3.s
149; CHECK-NEXT:    sunpkhi z3.d, z3.s
150; CHECK-NEXT:    ret
151  %wide.masked.load = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr %base, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x i16> undef)
152  %res = sext <vscale x 8 x i16> %wide.masked.load to <vscale x 8 x i64>
153  ret <vscale x 8 x i64> %res
154}
155
156define <vscale x 4 x i64> @masked_ld1h_nxv4i16_sext(ptr %a, <vscale x 4 x i1> %mask) {
157; CHECK-LABEL: masked_ld1h_nxv4i16_sext:
158; CHECK:       // %bb.0:
159; CHECK-NEXT:    ld1sh { z1.s }, p0/z, [x0]
160; CHECK-NEXT:    sunpklo z0.d, z1.s
161; CHECK-NEXT:    sunpkhi z1.d, z1.s
162; CHECK-NEXT:    ret
163  %wide.masked.load = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i16> poison)
164  %res = sext <vscale x 4 x i16> %wide.masked.load to <vscale x 4 x i64>
165  ret <vscale x 4 x i64> %res
166}
167
168define <vscale x 8 x i64> @masked_ld1h_i16_zext(ptr %base, <vscale x 8 x i1> %mask) {
169; CHECK-LABEL: masked_ld1h_i16_zext:
170; CHECK:       // %bb.0:
171; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
172; CHECK-NEXT:    uunpklo z1.s, z0.h
173; CHECK-NEXT:    uunpkhi z3.s, z0.h
174; CHECK-NEXT:    uunpklo z0.d, z1.s
175; CHECK-NEXT:    uunpkhi z1.d, z1.s
176; CHECK-NEXT:    uunpklo z2.d, z3.s
177; CHECK-NEXT:    uunpkhi z3.d, z3.s
178; CHECK-NEXT:    ret
179  %wide.masked.load = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr %base, i32 2, <vscale x 8 x i1> %mask, <vscale x 8 x i16> undef)
180  %res = zext <vscale x 8 x i16> %wide.masked.load to <vscale x 8 x i64>
181  ret <vscale x 8 x i64> %res
182}
183
184define <vscale x 4 x i64> @masked_ld1h_nxv4i16_zext(ptr %a, <vscale x 4 x i1> %mask) {
185; CHECK-LABEL: masked_ld1h_nxv4i16_zext:
186; CHECK:       // %bb.0:
187; CHECK-NEXT:    ld1h { z1.s }, p0/z, [x0]
188; CHECK-NEXT:    uunpklo z0.d, z1.s
189; CHECK-NEXT:    uunpkhi z1.d, z1.s
190; CHECK-NEXT:    ret
191  %wide.masked.load = call <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr %a, i32 1, <vscale x 4 x i1> %mask, <vscale x 4 x i16> poison)
192  %res = zext <vscale x 4 x i16> %wide.masked.load to <vscale x 4 x i64>
193  ret <vscale x 4 x i64> %res
194}
195
196;
197; LD1W
198;
199
200define <vscale x 4 x i64> @masked_ld1w_i32_sext(ptr %base, <vscale x 4 x i1> %mask) {
201; CHECK-LABEL: masked_ld1w_i32_sext:
202; CHECK:       // %bb.0:
203; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
204; CHECK-NEXT:    sunpklo z0.d, z1.s
205; CHECK-NEXT:    sunpkhi z1.d, z1.s
206; CHECK-NEXT:    ret
207  %wide.masked.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr %base, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
208  %res = sext <vscale x 4 x i32> %wide.masked.load to <vscale x 4 x i64>
209  ret <vscale x 4 x i64> %res
210}
211
212define <vscale x 4 x i64> @masked_ld1w_i32_zext(ptr %base, <vscale x 4 x i1> %mask) {
213; CHECK-LABEL: masked_ld1w_i32_zext:
214; CHECK:       // %bb.0:
215; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0]
216; CHECK-NEXT:    uunpklo z0.d, z1.s
217; CHECK-NEXT:    uunpkhi z1.d, z1.s
218; CHECK-NEXT:    ret
219  %wide.masked.load = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr %base, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef)
220  %res = zext <vscale x 4 x i32> %wide.masked.load to <vscale x 4 x i64>
221  ret <vscale x 4 x i64> %res
222}
223
224declare <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr, i32 immarg, <vscale x 16 x i1>, <vscale x 16 x i8>)
225declare <vscale x 8 x i8> @llvm.masked.load.nxv8i8.p0(ptr, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i8>)
226declare <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i8>)
227declare <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr, i32 immarg, <vscale x 8 x i1>, <vscale x 8 x i16>)
228declare <vscale x 4 x i16> @llvm.masked.load.nxv4i16.p0(ptr, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i16>)
229declare <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr, i32 immarg, <vscale x 4 x i1>, <vscale x 4 x i32>)
230